From 28c455ceb2d9fc276839ebeae68115f850b25c6d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 9 Jan 2011 17:52:09 +0100 Subject: [PATCH 001/609] drbd: Get rid of req_validator_fn typedef Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 43beaca5317..d499aa6b7ac 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4270,11 +4270,9 @@ static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev, return NULL; } -typedef struct drbd_request *(req_validator_fn) - (struct drbd_conf *mdev, u64 id, sector_t sector); - static int validate_req_change_req_state(struct drbd_conf *mdev, - u64 id, sector_t sector, req_validator_fn validator, + u64 id, sector_t sector, + struct drbd_request *(*validator)(struct drbd_conf *, u64, sector_t), const char *func, enum drbd_req_event what) { struct drbd_request *req; From 3980485361f5f71e559c6b8868bb5a1c41171407 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 11 Jan 2011 12:25:21 +0100 Subject: [PATCH 002/609] drbd: Remove superfluous declaration Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index ef2ceed3be4..88b247eac34 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -689,7 +689,6 @@ struct drbd_work { drbd_work_cb cb; }; -struct drbd_tl_epoch; struct drbd_request { struct drbd_work w; struct drbd_conf *mdev; From 9a8e77530fa7059044114bcf1a897a470ec21bc9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 11 Jan 2011 14:04:09 +0100 Subject: [PATCH 003/609] drbd: Consistently use block_id == ID_SYNCER for checksum based resync and online verify DRBD_MAGIC has nothing to do with block ids and the funny values computed were not actually used, anyway. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 4 ++-- drivers/block/drbd/drbd_receiver.c | 4 ++++ drivers/block/drbd/drbd_worker.c | 4 +--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 0358e55356c..0c16620ecec 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2474,7 +2474,7 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev, struct p_block_req p; p.sector = cpu_to_be64(sector); - p.block_id = BE_DRBD_MAGIC + 0xbeef; + p.block_id = ID_SYNCER /* unused */; p.blksize = cpu_to_be32(size); p.head.magic = BE_DRBD_MAGIC; @@ -2497,7 +2497,7 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) struct p_block_req p; p.sector = cpu_to_be64(sector); - p.block_id = BE_DRBD_MAGIC + 0xbabe; + p.block_id = ID_SYNCER /* unused */; p.blksize = cpu_to_be32(size); ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST, diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index d499aa6b7ac..5ed96198538 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -341,6 +341,10 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, e->size = data_size; e->flags = 0; e->sector = sector; + /* + * The block_id is opaque to the receiver. It is not endianness + * converted, and sent back to the sender unchanged. + */ e->block_id = id; return e; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 4d3e6f6213b..10438c41f55 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -305,8 +305,6 @@ int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) void *digest; int ok = 1; - D_ASSERT(e->block_id == DRBD_MAGIC + 0xbeef); - if (unlikely(cancel)) goto out; @@ -359,7 +357,7 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) /* GFP_TRY, because if there is no memory available right now, this may * be rescheduled for later. It is "only" background resync, after all. */ - e = drbd_alloc_ee(mdev, DRBD_MAGIC+0xbeef, sector, size, GFP_TRY); + e = drbd_alloc_ee(mdev, ID_SYNCER /* unused */, sector, size, GFP_TRY); if (!e) goto defer; From ca9bc12b90fbc4e2b1f81360f63842c9da54bb3c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 11 Jan 2011 13:47:24 +0100 Subject: [PATCH 004/609] drbd: Get rid of BE_DRBD_MAGIC and BE_DRBD_MAGIC_BIG Converting the constants happens at compile time. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 14 +++++++------- drivers/block/drbd/drbd_receiver.c | 8 ++++---- include/linux/drbd.h | 2 -- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 0c16620ecec..2cd132a91b8 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1841,7 +1841,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, ERR_IF(!h) return false; ERR_IF(!size) return false; - h->magic = BE_DRBD_MAGIC; + h->magic = cpu_to_be32(DRBD_MAGIC); h->command = cpu_to_be16(cmd); h->length = cpu_to_be16(size-sizeof(struct p_header80)); @@ -1889,7 +1889,7 @@ int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data, struct p_header80 h; int ok; - h.magic = BE_DRBD_MAGIC; + h.magic = cpu_to_be32(DRBD_MAGIC); h.command = cpu_to_be16(cmd); h.length = cpu_to_be16(size); @@ -2477,7 +2477,7 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev, p.block_id = ID_SYNCER /* unused */; p.blksize = cpu_to_be32(size); - p.head.magic = BE_DRBD_MAGIC; + p.head.magic = cpu_to_be32(DRBD_MAGIC); p.head.command = cpu_to_be16(cmd); p.head.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size); @@ -2682,12 +2682,12 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; if (req->size <= DRBD_MAX_SIZE_H80_PACKET) { - p.head.h80.magic = BE_DRBD_MAGIC; + p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); p.head.h80.command = cpu_to_be16(P_DATA); p.head.h80.length = cpu_to_be16(sizeof(p) - sizeof(union p_header) + dgs + req->size); } else { - p.head.h95.magic = BE_DRBD_MAGIC_BIG; + p.head.h95.magic = cpu_to_be16(DRBD_MAGIC_BIG); p.head.h95.command = cpu_to_be16(P_DATA); p.head.h95.length = cpu_to_be32(sizeof(p) - sizeof(union p_header) + dgs + req->size); @@ -2767,12 +2767,12 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; if (e->size <= DRBD_MAX_SIZE_H80_PACKET) { - p.head.h80.magic = BE_DRBD_MAGIC; + p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); p.head.h80.command = cpu_to_be16(cmd); p.head.h80.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + dgs + e->size); } else { - p.head.h95.magic = BE_DRBD_MAGIC_BIG; + p.head.h95.magic = cpu_to_be16(DRBD_MAGIC_BIG); p.head.h95.command = cpu_to_be16(cmd); p.head.h95.length = cpu_to_be32(sizeof(p) - sizeof(struct p_header80) + dgs + e->size); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 5ed96198538..69eec6980c2 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -712,7 +712,7 @@ static enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *soc rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0); - if (rr == sizeof(*h) && h->magic == BE_DRBD_MAGIC) + if (rr == sizeof(*h) && h->magic == cpu_to_be32(DRBD_MAGIC)) return be16_to_cpu(h->command); return 0xffff; @@ -935,10 +935,10 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi return false; } - if (likely(h->h80.magic == BE_DRBD_MAGIC)) { + if (likely(h->h80.magic == cpu_to_be32(DRBD_MAGIC))) { *cmd = be16_to_cpu(h->h80.command); *packet_size = be16_to_cpu(h->h80.length); - } else if (h->h95.magic == BE_DRBD_MAGIC_BIG) { + } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) { *cmd = be16_to_cpu(h->h95.command); *packet_size = be32_to_cpu(h->h95.length); } else { @@ -4623,7 +4623,7 @@ int drbd_asender(struct drbd_thread *thi) } if (received == expect && cmd == NULL) { - if (unlikely(h->magic != BE_DRBD_MAGIC)) { + if (unlikely(h->magic != cpu_to_be32(DRBD_MAGIC))) { dev_err(DEV, "magic?? on meta m: 0x%08x c: %d l: %d\n", be32_to_cpu(h->magic), be16_to_cpu(h->command), diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 9e5f5607eba..d2820281167 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -334,9 +334,7 @@ enum drbd_timeout_flag { #define UUID_JUST_CREATED ((__u64)4) #define DRBD_MAGIC 0x83740267 -#define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC) #define DRBD_MAGIC_BIG 0x835a -#define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG) /* these are of type "int" */ #define DRBD_MD_INDEX_INTERNAL -1 From e7fad8af750c5780143e4b6876f80042ec0c21f5 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 11 Jan 2011 13:54:02 +0100 Subject: [PATCH 005/609] drbd: Endianness convert the constants instead of the variables Converting the constants happens at compile time. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 2 +- drivers/block/drbd/drbd_main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index cf0e63dd97d..0eb17d3adf2 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -407,7 +407,7 @@ static int drbd_al_read_tr(struct drbd_conf *mdev, if (!drbd_md_sync_page_io(mdev, bdev, sector, READ)) return -1; - rv = (be32_to_cpu(b->magic) == DRBD_MAGIC); + rv = (b->magic == cpu_to_be32(DRBD_MAGIC)); for (i = 0; i < AL_EXTENTS_PT + 1; i++) xor_sum ^= be32_to_cpu(b->updates[i].extent); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2cd132a91b8..f65b8c53224 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3729,7 +3729,7 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) goto err; } - if (be32_to_cpu(buffer->magic) != DRBD_MD_MAGIC) { + if (buffer->magic != cpu_to_be32(DRBD_MD_MAGIC)) { dev_err(DEV, "Error while reading metadata, magic not found.\n"); rv = ERR_MD_INVALID; goto err; From 579b57ed730819970a3542b4bbcc2d4176f25c72 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 13 Jan 2011 18:40:57 +0100 Subject: [PATCH 006/609] drbd: Magic reserved block_id value cleanup The ID_VACANT definition has become entirely irrelevant by now. The is_syncer_block_id() macro does not improve the code, so eliminated it. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 9 +-------- drivers/block/drbd/drbd_receiver.c | 4 ++-- drivers/block/drbd/drbd_worker.c | 14 ++++---------- 3 files changed, 7 insertions(+), 20 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 88b247eac34..c1d175514aa 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -87,17 +87,10 @@ extern char usermode_helper[]; */ #define DRBD_SIGKILL SIGHUP -/* All EEs on the free list should have ID_VACANT (== 0) - * freshly allocated EEs get !ID_VACANT (== 1) - * so if it says "cannot dereference null pointer at address 0x00000001", - * it is most likely one of these :( */ - #define ID_IN_SYNC (4711ULL) #define ID_OUT_OF_SYNC (4712ULL) - #define ID_SYNCER (-1ULL) -#define ID_VACANT 0 -#define is_syncer_block_id(id) ((id) == ID_SYNCER) + #define UUID_NEW_BM_OFFSET ((u64)0x0001000000000000ULL) struct drbd_conf; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 69eec6980c2..efe141eb521 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4308,7 +4308,7 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) update_peer_seq(mdev, be32_to_cpu(p->seq_num)); - if (is_syncer_block_id(p->block_id)) { + if (p->block_id == ID_SYNCER) { drbd_set_in_sync(mdev, sector, blksize); dec_rs_pending(mdev); return true; @@ -4349,7 +4349,7 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) update_peer_seq(mdev, be32_to_cpu(p->seq_num)); - if (is_syncer_block_id(p->block_id)) { + if (p->block_id == ID_SYNCER) { dec_rs_pending(mdev); drbd_rs_failed_io(mdev, sector, size); return true; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 10438c41f55..43a9fefd29b 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -85,8 +85,6 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local) unsigned long flags = 0; struct drbd_conf *mdev = e->mdev; - D_ASSERT(e->block_id != ID_VACANT); - spin_lock_irqsave(&mdev->req_lock, flags); mdev->read_cnt += e->size >> 9; list_del(&e->w.list); @@ -108,18 +106,16 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo struct drbd_conf *mdev = e->mdev; sector_t e_sector; int do_wake; - int is_syncer_req; + u64 block_id; int do_al_complete_io; - D_ASSERT(e->block_id != ID_VACANT); - /* after we moved e to done_ee, * we may no longer access it, * it may be freed/reused already! * (as soon as we release the req_lock) */ e_sector = e->sector; do_al_complete_io = e->flags & EE_CALL_AL_COMPLETE_IO; - is_syncer_req = is_syncer_block_id(e->block_id); + block_id = e->block_id; spin_lock_irqsave(&mdev->req_lock, flags); mdev->writ_cnt += e->size >> 9; @@ -131,15 +127,13 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo * done from "drbd_process_done_ee" within the appropriate w.cb * (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */ - do_wake = is_syncer_req - ? list_empty(&mdev->sync_ee) - : list_empty(&mdev->active_ee); + do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee); if (test_bit(__EE_WAS_ERROR, &e->flags)) __drbd_chk_io_error(mdev, false); spin_unlock_irqrestore(&mdev->req_lock, flags); - if (is_syncer_req) + if (block_id == ID_SYNCER) drbd_rs_complete_io(mdev, e_sector); if (do_wake) From d628769b3c6b4ddafea358944ef1f106fccfaaff Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 13 Jan 2011 23:05:39 +0100 Subject: [PATCH 007/609] drbd: Move drbd_free_tl_hash() to drbd_main() This is the only place where this function is used. Make it static. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 - drivers/block/drbd/drbd_main.c | 30 ++++++++++++++++++++++++++++++ drivers/block/drbd/drbd_receiver.c | 30 ------------------------------ 3 files changed, 30 insertions(+), 31 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c1d175514aa..c6d8200b4b5 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1592,7 +1592,6 @@ extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled); extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed); extern void drbd_flush_workqueue(struct drbd_conf *mdev); -extern void drbd_free_tl_hash(struct drbd_conf *mdev); /* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to * mess with get_fs/set_fs, we know we are KERNEL_DS always. */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f65b8c53224..eecbfc8f897 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -228,6 +228,36 @@ static void tl_cleanup(struct drbd_conf *mdev) mdev->tl_hash_s = 0; } +static void drbd_free_tl_hash(struct drbd_conf *mdev) +{ + struct hlist_head *h; + + spin_lock_irq(&mdev->req_lock); + + if (!mdev->tl_hash || mdev->state.conn != C_STANDALONE) { + spin_unlock_irq(&mdev->req_lock); + return; + } + /* paranoia code */ + for (h = mdev->ee_hash; h < mdev->ee_hash + mdev->ee_hash_s; h++) + if (h->first) + dev_err(DEV, "ASSERT FAILED ee_hash[%u].first == %p, expected NULL\n", + (int)(h - mdev->ee_hash), h->first); + kfree(mdev->ee_hash); + mdev->ee_hash = NULL; + mdev->ee_hash_s = 0; + + /* paranoia code */ + for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++) + if (h->first) + dev_err(DEV, "ASSERT FAILED tl_hash[%u] == %p, expected NULL\n", + (int)(h - mdev->tl_hash), h->first); + kfree(mdev->tl_hash); + mdev->tl_hash = NULL; + mdev->tl_hash_s = 0; + spin_unlock_irq(&mdev->req_lock); +} + /** * _tl_add_barrier() - Adds a barrier to the transfer log * @mdev: DRBD device. diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index efe141eb521..bafc233ef3f 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3740,36 +3740,6 @@ void drbd_flush_workqueue(struct drbd_conf *mdev) wait_for_completion(&barr.done); } -void drbd_free_tl_hash(struct drbd_conf *mdev) -{ - struct hlist_head *h; - - spin_lock_irq(&mdev->req_lock); - - if (!mdev->tl_hash || mdev->state.conn != C_STANDALONE) { - spin_unlock_irq(&mdev->req_lock); - return; - } - /* paranoia code */ - for (h = mdev->ee_hash; h < mdev->ee_hash + mdev->ee_hash_s; h++) - if (h->first) - dev_err(DEV, "ASSERT FAILED ee_hash[%u].first == %p, expected NULL\n", - (int)(h - mdev->ee_hash), h->first); - kfree(mdev->ee_hash); - mdev->ee_hash = NULL; - mdev->ee_hash_s = 0; - - /* paranoia code */ - for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++) - if (h->first) - dev_err(DEV, "ASSERT FAILED tl_hash[%u] == %p, expected NULL\n", - (int)(h - mdev->tl_hash), h->first); - kfree(mdev->tl_hash); - mdev->tl_hash = NULL; - mdev->tl_hash_s = 0; - spin_unlock_irq(&mdev->req_lock); -} - static void drbd_disconnect(struct drbd_conf *mdev) { enum drbd_fencing_p fp; From 9c50842a35420f9c8fde9da626a9c0cad456becc Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 14 Jan 2011 21:19:36 +0100 Subject: [PATCH 008/609] drbd: Update outdated comment Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index bafc233ef3f..26810ce5d1e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1512,7 +1512,7 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packets cmd, un if (get_ldev(mdev)) { /* data is submitted to disk within recv_resync_read. * corresponding put_ldev done below on error, - * or in drbd_endio_write_sec. */ + * or in drbd_endio_sec. */ ok = recv_resync_read(mdev, sector, data_size); } else { if (__ratelimit(&drbd_ratelimit_state)) @@ -1673,7 +1673,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned /* get_ldev(mdev) successful. * Corresponding put_ldev done either below (on various errors), - * or in drbd_endio_write_sec, if we successfully submit the data at + * or in drbd_endio_sec, if we successfully submit the data at * the end of this function. */ sector = be64_to_cpu(p->sector); From 516245856456db591c5336a90584077545e772b1 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 5 Jan 2011 23:27:02 +0100 Subject: [PATCH 009/609] drbd: Request lookup code cleanup (1) Move _ar_id_to_req() to drbd_receiver.c and mark it non-inline. Remove the leading underscores from _ar_id_to_req() and _ack_id_to_req(). Mark ar_hash_slot() inline. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 32 +++++++++++++++++++++++------- drivers/block/drbd/drbd_req.h | 21 ++------------------ 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 26810ce5d1e..1684c4809a9 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1469,6 +1469,24 @@ fail: return false; } +/* when we receive the answer for a read request, + * verify that we actually know about it */ +static struct drbd_request *ar_id_to_req(struct drbd_conf *mdev, u64 id, + sector_t sector) +{ + struct hlist_head *slot = ar_hash_slot(mdev, sector); + struct hlist_node *n; + struct drbd_request *req; + + hlist_for_each_entry(req, n, slot, collision) { + if ((unsigned long)req == (unsigned long)id) { + D_ASSERT(req->sector == sector); + return req; + } + } + return NULL; +} + static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { struct drbd_request *req; @@ -1479,7 +1497,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi sector = be64_to_cpu(p->sector); spin_lock_irq(&mdev->req_lock); - req = _ar_id_to_req(mdev, p->block_id, sector); + req = ar_id_to_req(mdev, p->block_id, sector); spin_unlock_irq(&mdev->req_lock); if (unlikely(!req)) { dev_err(DEV, "Got a corrupt block_id/sector pair(1).\n"); @@ -4222,8 +4240,8 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) /* when we receive the ACK for a write request, * verify that we actually know about it */ -static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev, - u64 id, sector_t sector) +static struct drbd_request *ack_id_to_req(struct drbd_conf *mdev, u64 id, + sector_t sector) { struct hlist_head *slot = tl_hash_slot(mdev, sector); struct hlist_node *n; @@ -4232,7 +4250,7 @@ static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev, hlist_for_each_entry(req, n, slot, collision) { if ((unsigned long)req == (unsigned long)id) { if (req->sector != sector) { - dev_err(DEV, "_ack_id_to_req: found req %p but it has " + dev_err(DEV, "ack_id_to_req: found req %p but it has " "wrong sector (%llus versus %llus)\n", req, (unsigned long long)req->sector, (unsigned long long)sector); @@ -4306,7 +4324,7 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) } return validate_req_change_req_state(mdev, p->block_id, sector, - _ack_id_to_req, __func__ , what); + ack_id_to_req, __func__, what); } static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) @@ -4326,7 +4344,7 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) } spin_lock_irq(&mdev->req_lock); - req = _ack_id_to_req(mdev, p->block_id, sector); + req = ack_id_to_req(mdev, p->block_id, sector); if (!req) { spin_unlock_irq(&mdev->req_lock); if (mdev->net_conf->wire_protocol == DRBD_PROT_A || @@ -4363,7 +4381,7 @@ static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h) (unsigned long long)sector, be32_to_cpu(p->blksize)); return validate_req_change_req_state(mdev, p->block_id, sector, - _ar_id_to_req, __func__ , neg_acked); + ar_id_to_req, __func__ , neg_acked); } static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h) diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 68a234a5fdc..a773636cca9 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -241,30 +241,13 @@ struct hlist_head *tl_hash_slot(struct drbd_conf *mdev, sector_t sector) } /* application reads (drbd_request objects) */ -static struct hlist_head *ar_hash_slot(struct drbd_conf *mdev, sector_t sector) +static inline +struct hlist_head *ar_hash_slot(struct drbd_conf *mdev, sector_t sector) { return mdev->app_reads_hash + ((unsigned int)(sector) % APP_R_HSIZE); } -/* when we receive the answer for a read request, - * verify that we actually know about it */ -static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev, - u64 id, sector_t sector) -{ - struct hlist_head *slot = ar_hash_slot(mdev, sector); - struct hlist_node *n; - struct drbd_request *req; - - hlist_for_each_entry(req, n, slot, collision) { - if ((unsigned long)req == (unsigned long)id) { - D_ASSERT(req->sector == sector); - return req; - } - } - return NULL; -} - static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src) { struct bio *bio; From 668eebc6a10ba146db6b8257b9938121d1f3a06a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 20 Jan 2011 17:14:26 +0100 Subject: [PATCH 010/609] drbd: Request lookup code cleanup (2) Unify the ar_id_to_req() and ack_id_to_req() functions: make both fail if the consistency check fails. Move the request lookup code now duplicated in both functions into its own function. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 55 +++++++++++++++--------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1684c4809a9..ae32aed441a 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1469,22 +1469,37 @@ fail: return false; } +static struct drbd_request * +find_request(struct drbd_conf *mdev, + struct hlist_head *(*hash_slot)(struct drbd_conf *, sector_t), + u64 id, sector_t sector, const char *func) +{ + struct hlist_head *slot = hash_slot(mdev, sector); + struct hlist_node *n; + struct drbd_request *req; + + hlist_for_each_entry(req, n, slot, collision) { + if ((unsigned long)req != (unsigned long)id) + continue; + if (req->sector != sector) { + dev_err(DEV, "%s: found request %lu but it has " + "wrong sector (%llus versus %llus)\n", + func, (unsigned long)req, + (unsigned long long)req->sector, + (unsigned long long)sector); + break; + } + return req; + } + return NULL; +} + /* when we receive the answer for a read request, * verify that we actually know about it */ static struct drbd_request *ar_id_to_req(struct drbd_conf *mdev, u64 id, sector_t sector) { - struct hlist_head *slot = ar_hash_slot(mdev, sector); - struct hlist_node *n; - struct drbd_request *req; - - hlist_for_each_entry(req, n, slot, collision) { - if ((unsigned long)req == (unsigned long)id) { - D_ASSERT(req->sector == sector); - return req; - } - } - return NULL; + return find_request(mdev, ar_hash_slot, id, sector, __func__); } static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) @@ -4243,23 +4258,7 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) static struct drbd_request *ack_id_to_req(struct drbd_conf *mdev, u64 id, sector_t sector) { - struct hlist_head *slot = tl_hash_slot(mdev, sector); - struct hlist_node *n; - struct drbd_request *req; - - hlist_for_each_entry(req, n, slot, collision) { - if ((unsigned long)req == (unsigned long)id) { - if (req->sector != sector) { - dev_err(DEV, "ack_id_to_req: found req %p but it has " - "wrong sector (%llus versus %llus)\n", req, - (unsigned long long)req->sector, - (unsigned long long)sector); - break; - } - return req; - } - } - return NULL; + return find_request(mdev, tl_hash_slot, id, sector, __func__); } static int validate_req_change_req_state(struct drbd_conf *mdev, From ae3388daaec96cc53d6d02cae0d8b744a6b9ca5c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 20 Jan 2011 17:23:59 +0100 Subject: [PATCH 011/609] drbd: Request lookup code cleanup (3) Get rid of the ar_id_to_req() and ack_id_to_req() wrappers. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index ae32aed441a..84c8d94a9d0 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1494,14 +1494,6 @@ find_request(struct drbd_conf *mdev, return NULL; } -/* when we receive the answer for a read request, - * verify that we actually know about it */ -static struct drbd_request *ar_id_to_req(struct drbd_conf *mdev, u64 id, - sector_t sector) -{ - return find_request(mdev, ar_hash_slot, id, sector, __func__); -} - static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { struct drbd_request *req; @@ -1512,7 +1504,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi sector = be64_to_cpu(p->sector); spin_lock_irq(&mdev->req_lock); - req = ar_id_to_req(mdev, p->block_id, sector); + req = find_request(mdev, ar_hash_slot, p->block_id, sector, __func__); spin_unlock_irq(&mdev->req_lock); if (unlikely(!req)) { dev_err(DEV, "Got a corrupt block_id/sector pair(1).\n"); @@ -4253,24 +4245,16 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) return true; } -/* when we receive the ACK for a write request, - * verify that we actually know about it */ -static struct drbd_request *ack_id_to_req(struct drbd_conf *mdev, u64 id, - sector_t sector) -{ - return find_request(mdev, tl_hash_slot, id, sector, __func__); -} - static int validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector, - struct drbd_request *(*validator)(struct drbd_conf *, u64, sector_t), + struct hlist_head *(*hash_slot)(struct drbd_conf *, sector_t), const char *func, enum drbd_req_event what) { struct drbd_request *req; struct bio_and_error m; spin_lock_irq(&mdev->req_lock); - req = validator(mdev, id, sector); + req = find_request(mdev, hash_slot, id, sector, func); if (unlikely(!req)) { spin_unlock_irq(&mdev->req_lock); @@ -4323,7 +4307,7 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) } return validate_req_change_req_state(mdev, p->block_id, sector, - ack_id_to_req, __func__, what); + tl_hash_slot, __func__, what); } static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) @@ -4343,7 +4327,7 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) } spin_lock_irq(&mdev->req_lock); - req = ack_id_to_req(mdev, p->block_id, sector); + req = find_request(mdev, tl_hash_slot, p->block_id, sector, __func__); if (!req) { spin_unlock_irq(&mdev->req_lock); if (mdev->net_conf->wire_protocol == DRBD_PROT_A || @@ -4380,7 +4364,7 @@ static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h) (unsigned long long)sector, be32_to_cpu(p->blksize)); return validate_req_change_req_state(mdev, p->block_id, sector, - ar_id_to_req, __func__ , neg_acked); + ar_hash_slot, __func__, neg_acked); } static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h) From c3afd8f568999e974382f7b5b05267c018056016 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 20 Jan 2011 22:25:40 +0100 Subject: [PATCH 012/609] drbd: Request lookup code cleanup (4) Factor out duplicate code in got_NegAck(). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 66 +++++++++++++----------------- 1 file changed, 28 insertions(+), 38 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 84c8d94a9d0..8e7875e7260 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1472,7 +1472,7 @@ fail: static struct drbd_request * find_request(struct drbd_conf *mdev, struct hlist_head *(*hash_slot)(struct drbd_conf *, sector_t), - u64 id, sector_t sector, const char *func) + u64 id, sector_t sector, bool missing_ok, const char *func) { struct hlist_head *slot = hash_slot(mdev, sector); struct hlist_node *n; @@ -1487,10 +1487,14 @@ find_request(struct drbd_conf *mdev, func, (unsigned long)req, (unsigned long long)req->sector, (unsigned long long)sector); - break; + return NULL; } return req; } + if (!missing_ok) { + dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func, + (unsigned long)id, (unsigned long long)sector); + } return NULL; } @@ -1504,12 +1508,10 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi sector = be64_to_cpu(p->sector); spin_lock_irq(&mdev->req_lock); - req = find_request(mdev, ar_hash_slot, p->block_id, sector, __func__); + req = find_request(mdev, ar_hash_slot, p->block_id, sector, false, __func__); spin_unlock_irq(&mdev->req_lock); - if (unlikely(!req)) { - dev_err(DEV, "Got a corrupt block_id/sector pair(1).\n"); + if (unlikely(!req)) return false; - } /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid * special casing it there for the various failure cases. @@ -4248,18 +4250,15 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) static int validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector, struct hlist_head *(*hash_slot)(struct drbd_conf *, sector_t), - const char *func, enum drbd_req_event what) + const char *func, enum drbd_req_event what, bool missing_ok) { struct drbd_request *req; struct bio_and_error m; spin_lock_irq(&mdev->req_lock); - req = find_request(mdev, hash_slot, id, sector, func); + req = find_request(mdev, hash_slot, id, sector, missing_ok, func); if (unlikely(!req)) { spin_unlock_irq(&mdev->req_lock); - - dev_err(DEV, "%s: failed to find req %p, sector %llus\n", func, - (void *)(unsigned long)id, (unsigned long long)sector); return false; } __req_mod(req, what, &m); @@ -4307,7 +4306,8 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) } return validate_req_change_req_state(mdev, p->block_id, sector, - tl_hash_slot, __func__, what); + tl_hash_slot, __func__, what, + false); } static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) @@ -4315,8 +4315,9 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) struct p_block_ack *p = (struct p_block_ack *)h; sector_t sector = be64_to_cpu(p->sector); int size = be32_to_cpu(p->blksize); - struct drbd_request *req; - struct bio_and_error m; + bool missing_ok = mdev->net_conf->wire_protocol == DRBD_PROT_A || + mdev->net_conf->wire_protocol == DRBD_PROT_B; + bool found; update_peer_seq(mdev, be32_to_cpu(p->seq_num)); @@ -4326,31 +4327,19 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) return true; } - spin_lock_irq(&mdev->req_lock); - req = find_request(mdev, tl_hash_slot, p->block_id, sector, __func__); - if (!req) { - spin_unlock_irq(&mdev->req_lock); - if (mdev->net_conf->wire_protocol == DRBD_PROT_A || - mdev->net_conf->wire_protocol == DRBD_PROT_B) { - /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs. - The master bio might already be completed, therefore the - request is no longer in the collision hash. - => Do not try to validate block_id as request. */ - /* In Protocol B we might already have got a P_RECV_ACK - but then get a P_NEG_ACK after wards. */ - drbd_set_out_of_sync(mdev, sector, size); - return true; - } else { - dev_err(DEV, "%s: failed to find req %p, sector %llus\n", __func__, - (void *)(unsigned long)p->block_id, (unsigned long long)sector); + found = validate_req_change_req_state(mdev, p->block_id, sector, + tl_hash_slot, __func__, + neg_acked, missing_ok); + if (!found) { + /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs. + The master bio might already be completed, therefore the + request is no longer in the collision hash. */ + /* In Protocol B we might already have got a P_RECV_ACK + but then get a P_NEG_ACK afterwards. */ + if (!missing_ok) return false; - } + drbd_set_out_of_sync(mdev, sector, size); } - __req_mod(req, neg_acked, &m); - spin_unlock_irq(&mdev->req_lock); - - if (m.bio) - complete_master_bio(mdev, &m); return true; } @@ -4364,7 +4353,8 @@ static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h) (unsigned long long)sector, be32_to_cpu(p->blksize)); return validate_req_change_req_state(mdev, p->block_id, sector, - ar_hash_slot, __func__, neg_acked); + ar_hash_slot, __func__, neg_acked, + false); } static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h) From 0939b0e5cdeeafa0adf0150edd350092e47acc49 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 3 Jan 2011 17:42:00 +0100 Subject: [PATCH 013/609] drbd: Add interval tree data structure Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/Makefile | 1 + drivers/block/drbd/drbd_interval.c | 156 +++++++++++++++++++++++++++++ drivers/block/drbd/drbd_interval.h | 31 ++++++ 3 files changed, 188 insertions(+) create mode 100644 drivers/block/drbd/drbd_interval.c create mode 100644 drivers/block/drbd/drbd_interval.h diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile index 0d3f337ff5f..cacbb04f285 100644 --- a/drivers/block/drbd/Makefile +++ b/drivers/block/drbd/Makefile @@ -1,5 +1,6 @@ drbd-y := drbd_bitmap.o drbd_proc.o drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o drbd-y += drbd_main.o drbd_strings.o drbd_nl.o +drbd-y += drbd_interval.o obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o diff --git a/drivers/block/drbd/drbd_interval.c b/drivers/block/drbd/drbd_interval.c new file mode 100644 index 00000000000..2511dd9993f --- /dev/null +++ b/drivers/block/drbd/drbd_interval.c @@ -0,0 +1,156 @@ +#include "drbd_interval.h" + +/** + * interval_end - return end of @node + */ +static inline +sector_t interval_end(struct rb_node *node) +{ + struct drbd_interval *this = rb_entry(node, struct drbd_interval, rb); + return this->end; +} + +/** + * update_interval_end - recompute end of @node + * + * The end of an interval is the highest (start + (size >> 9)) value of this + * node and of its children. Called for @node and its parents whenever the end + * may have changed. + */ +static void +update_interval_end(struct rb_node *node, void *__unused) +{ + struct drbd_interval *this = rb_entry(node, struct drbd_interval, rb); + sector_t end; + + end = this->sector + (this->size >> 9); + if (node->rb_left) { + sector_t left = interval_end(node->rb_left); + if (left > end) + end = left; + } + if (node->rb_right) { + sector_t right = interval_end(node->rb_right); + if (right > end) + end = right; + } + this->end = end; +} + +/** + * drbd_insert_interval - insert a new interval into a tree + */ +bool +drbd_insert_interval(struct rb_root *root, struct drbd_interval *this) +{ + struct rb_node **new = &root->rb_node, *parent = NULL; + + BUG_ON(!IS_ALIGNED(this->size, 512)); + + while (*new) { + struct drbd_interval *here = + rb_entry(*new, struct drbd_interval, rb); + + parent = *new; + if (this->sector < here->sector) + new = &(*new)->rb_left; + else if (this->sector > here->sector) + new = &(*new)->rb_right; + else if (this < here) + new = &(*new)->rb_left; + else if (this->sector > here->sector) + new = &(*new)->rb_right; + return false; + } + + rb_link_node(&this->rb, parent, new); + rb_insert_color(&this->rb, root); + rb_augment_insert(&this->rb, update_interval_end, NULL); + return true; +} + +/** + * drbd_contains_interval - check if a tree contains a given interval + * @sector: start sector of @interval + * @interval: may not be a valid pointer + * + * Returns if the tree contains the node @interval with start sector @start. + * Does not dereference @interval until @interval is known to be a valid object + * in @tree. Returns %false if @interval is in the tree but with a different + * sector number. + */ +bool +drbd_contains_interval(struct rb_root *root, sector_t sector, + struct drbd_interval *interval) +{ + struct rb_node *node = root->rb_node; + + while (node) { + struct drbd_interval *here = + rb_entry(node, struct drbd_interval, rb); + + if (sector < here->sector) + node = node->rb_left; + else if (sector > here->sector) + node = node->rb_right; + else if (interval < here) + node = node->rb_left; + else if (interval > here) + node = node->rb_right; + else + return interval->sector == sector; + } + return false; +} + +/** + * drbd_remove_interval - remove an interval from a tree + */ +void +drbd_remove_interval(struct rb_root *root, struct drbd_interval *this) +{ + struct rb_node *deepest; + + deepest = rb_augment_erase_begin(&this->rb); + rb_erase(&this->rb, root); + rb_augment_erase_end(deepest, update_interval_end, NULL); +} + +/** + * drbd_find_overlap - search for an interval overlapping with [sector, sector + size) + * @sector: start sector + * @size: size, aligned to 512 bytes + * + * Returns the interval overlapping with [sector, sector + size), or NULL. + * When there is more than one overlapping interval in the tree, the interval + * with the lowest start sector is returned. + */ +struct drbd_interval * +drbd_find_overlap(struct rb_root *root, sector_t sector, unsigned int size) +{ + struct rb_node *node = root->rb_node; + struct drbd_interval *overlap = NULL; + sector_t end = sector + (size >> 9); + + BUG_ON(!IS_ALIGNED(size, 512)); + + while (node) { + struct drbd_interval *here = + rb_entry(node, struct drbd_interval, rb); + + if (node->rb_left && + sector < interval_end(node->rb_left)) { + /* Overlap if any must be on left side */ + node = node->rb_left; + } else if (here->sector < end && + sector < here->sector + (here->size >> 9)) { + overlap = here; + break; + } else if (sector >= here->sector) { + /* Overlap if any must be on right side */ + node = node->rb_right; + } else + break; + } + return overlap; +} diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h new file mode 100644 index 00000000000..bf8dcf7bab0 --- /dev/null +++ b/drivers/block/drbd/drbd_interval.h @@ -0,0 +1,31 @@ +#ifndef __DRBD_INTERVAL_H +#define __DRBD_INTERVAL_H + +#include +#include + +struct drbd_interval { + struct rb_node rb; + sector_t sector; /* start sector of the interval */ + unsigned int size; /* size in bytes */ + sector_t end; /* highest interval end in subtree */ +}; + +static inline void drbd_clear_interval(struct drbd_interval *i) +{ + RB_CLEAR_NODE(&i->rb); +} + +static inline bool drbd_interval_empty(struct drbd_interval *i) +{ + return RB_EMPTY_NODE(&i->rb); +} + +bool drbd_insert_interval(struct rb_root *, struct drbd_interval *); +struct drbd_interval *drbd_find_interval(struct rb_root *, sector_t, + struct drbd_interval *); +void drbd_remove_interval(struct rb_root *, struct drbd_interval *); +struct drbd_interval *drbd_find_overlap(struct rb_root *, sector_t, + unsigned int); + +#endif /* __DRBD_INTERVAL_H */ From ace652acf2d7e564dac48c615d9184e7ed575f9c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 3 Jan 2011 17:09:58 +0100 Subject: [PATCH 014/609] drbd: Put sector and size in struct drbd_request into struct drbd_interval Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 5 ++-- drivers/block/drbd/drbd_main.c | 14 ++++----- drivers/block/drbd/drbd_receiver.c | 8 ++--- drivers/block/drbd/drbd_req.c | 48 +++++++++++++++--------------- drivers/block/drbd/drbd_req.h | 4 +-- drivers/block/drbd/drbd_worker.c | 4 +-- 6 files changed, 42 insertions(+), 41 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c6d8200b4b5..d7678e85031 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -682,6 +682,8 @@ struct drbd_work { drbd_work_cb cb; }; +#include "drbd_interval.h" + struct drbd_request { struct drbd_work w; struct drbd_conf *mdev; @@ -693,8 +695,7 @@ struct drbd_request { struct bio *private_bio; struct hlist_node collision; - sector_t sector; - unsigned int size; + struct drbd_interval i; unsigned int epoch; /* barrier_nr */ /* barrier_nr: used to check on "completion" whether this req was in diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index eecbfc8f897..a77b4bfd452 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2711,19 +2711,19 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; - if (req->size <= DRBD_MAX_SIZE_H80_PACKET) { + if (req->i.size <= DRBD_MAX_SIZE_H80_PACKET) { p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); p.head.h80.command = cpu_to_be16(P_DATA); p.head.h80.length = - cpu_to_be16(sizeof(p) - sizeof(union p_header) + dgs + req->size); + cpu_to_be16(sizeof(p) - sizeof(union p_header) + dgs + req->i.size); } else { p.head.h95.magic = cpu_to_be16(DRBD_MAGIC_BIG); p.head.h95.command = cpu_to_be16(P_DATA); p.head.h95.length = - cpu_to_be32(sizeof(p) - sizeof(union p_header) + dgs + req->size); + cpu_to_be32(sizeof(p) - sizeof(union p_header) + dgs + req->i.size); } - p.sector = cpu_to_be64(req->sector); + p.sector = cpu_to_be64(req->i.sector); p.block_id = (unsigned long)req; p.seq_num = cpu_to_be32(req->seq_num = atomic_add_return(1, &mdev->packet_seq)); @@ -2769,7 +2769,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) if (memcmp(mdev->int_dig_out, digest, dgs)) { dev_warn(DEV, "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n", - (unsigned long long)req->sector, req->size); + (unsigned long long)req->i.sector, req->i.size); } } /* else if (dgs > 64) { ... Be noisy about digest too large ... @@ -2837,8 +2837,8 @@ int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req) { struct p_block_desc p; - p.sector = cpu_to_be64(req->sector); - p.blksize = cpu_to_be32(req->size); + p.sector = cpu_to_be64(req->i.sector); + p.blksize = cpu_to_be32(req->i.size); return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OUT_OF_SYNC, &p.head, sizeof(p)); } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 8e7875e7260..6bb1a2f2a38 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1481,11 +1481,11 @@ find_request(struct drbd_conf *mdev, hlist_for_each_entry(req, n, slot, collision) { if ((unsigned long)req != (unsigned long)id) continue; - if (req->sector != sector) { + if (req->i.sector != sector) { dev_err(DEV, "%s: found request %lu but it has " "wrong sector (%llus versus %llus)\n", func, (unsigned long)req, - (unsigned long long)req->sector, + (unsigned long long)req->i.sector, (unsigned long long)sector); return NULL; } @@ -1783,7 +1783,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned hlist_add_head(&e->collision, ee_hash_slot(mdev, sector)); -#define OVERLAPS overlaps(i->sector, i->size, sector, size) +#define OVERLAPS overlaps(i->i.sector, i->i.size, sector, size) slot = tl_hash_slot(mdev, sector); first = 1; for (;;) { @@ -1800,7 +1800,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned " new: %llus +%u; pending: %llus +%u\n", current->comm, current->pid, (unsigned long long)sector, size, - (unsigned long long)i->sector, i->size); + (unsigned long long)i->i.sector, i->i.size); if (i->rq_state & RQ_NET_PENDING) ++have_unacked; ++have_conflict; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 3424d675b76..1af11a198b5 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -77,10 +77,10 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const * Other places where we set out-of-sync: * READ with local io-error */ if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK)) - drbd_set_out_of_sync(mdev, req->sector, req->size); + drbd_set_out_of_sync(mdev, req->i.sector, req->i.size); if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS)) - drbd_set_in_sync(mdev, req->sector, req->size); + drbd_set_in_sync(mdev, req->i.sector, req->i.size); /* one might be tempted to move the drbd_al_complete_io * to the local io completion callback drbd_endio_pri. @@ -95,12 +95,12 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const if (s & RQ_LOCAL_MASK) { if (get_ldev_if_state(mdev, D_FAILED)) { if (s & RQ_IN_ACT_LOG) - drbd_al_complete_io(mdev, req->sector); + drbd_al_complete_io(mdev, req->i.sector); put_ldev(mdev); } else if (__ratelimit(&drbd_ratelimit_state)) { dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu), " "but my Disk seems to have failed :(\n", - (unsigned long long) req->sector); + (unsigned long long) req->i.sector); } } } @@ -155,20 +155,20 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, * if we have the ee_hash (two_primaries) and * this has been on the network */ if ((s & RQ_NET_DONE) && mdev->ee_hash != NULL) { - const sector_t sector = req->sector; - const int size = req->size; + const sector_t sector = req->i.sector; + const int size = req->i.size; /* ASSERT: * there must be no conflicting requests, since * they must have been failed on the spot */ -#define OVERLAPS overlaps(sector, size, i->sector, i->size) +#define OVERLAPS overlaps(sector, size, i->i.sector, i->i.size) slot = tl_hash_slot(mdev, sector); hlist_for_each_entry(i, n, slot, collision) { if (OVERLAPS) { dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; " "other: %p %llus +%u\n", req, (unsigned long long)sector, size, - i, (unsigned long long)i->sector, i->size); + i, (unsigned long long)i->i.sector, i->i.size); } } @@ -186,7 +186,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, * we just have to do a wake_up. */ #undef OVERLAPS #define OVERLAPS overlaps(sector, size, e->sector, e->size) - slot = ee_hash_slot(mdev, req->sector); + slot = ee_hash_slot(mdev, req->i.sector); hlist_for_each_entry(e, n, slot, collision) { if (OVERLAPS) { wake_up(&mdev->misc_wait); @@ -322,8 +322,8 @@ static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_e static int _req_conflicts(struct drbd_request *req) { struct drbd_conf *mdev = req->mdev; - const sector_t sector = req->sector; - const int size = req->size; + const sector_t sector = req->i.sector; + const int size = req->i.size; struct drbd_request *i; struct drbd_epoch_entry *e; struct hlist_node *n; @@ -339,7 +339,7 @@ static int _req_conflicts(struct drbd_request *req) goto out_no_conflict; BUG_ON(mdev->tl_hash == NULL); -#define OVERLAPS overlaps(i->sector, i->size, sector, size) +#define OVERLAPS overlaps(i->i.sector, i->i.size, sector, size) slot = tl_hash_slot(mdev, sector); hlist_for_each_entry(i, n, slot, collision) { if (OVERLAPS) { @@ -348,7 +348,7 @@ static int _req_conflicts(struct drbd_request *req) "pending: %llus +%u\n", current->comm, current->pid, (unsigned long long)sector, size, - (unsigned long long)i->sector, i->size); + (unsigned long long)i->i.sector, i->i.size); goto out_conflict; } } @@ -430,9 +430,9 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, case completed_ok: if (bio_data_dir(req->master_bio) == WRITE) - mdev->writ_cnt += req->size>>9; + mdev->writ_cnt += req->i.size >> 9; else - mdev->read_cnt += req->size>>9; + mdev->read_cnt += req->i.size >> 9; req->rq_state |= (RQ_LOCAL_COMPLETED|RQ_LOCAL_OK); req->rq_state &= ~RQ_LOCAL_PENDING; @@ -459,7 +459,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case read_completed_with_error: - drbd_set_out_of_sync(mdev, req->sector, req->size); + drbd_set_out_of_sync(mdev, req->i.sector, req->i.size); req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; @@ -491,7 +491,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* so we can verify the handle in the answer packet * corresponding hlist_del is in _req_may_be_done() */ - hlist_add_head(&req->collision, ar_hash_slot(mdev, req->sector)); + hlist_add_head(&req->collision, ar_hash_slot(mdev, req->i.sector)); set_bit(UNPLUG_REMOTE, &mdev->flags); @@ -507,7 +507,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* assert something? */ /* from drbd_make_request_common only */ - hlist_add_head(&req->collision, tl_hash_slot(mdev, req->sector)); + hlist_add_head(&req->collision, tl_hash_slot(mdev, req->i.sector)); /* corresponding hlist_del is in _req_may_be_done() */ /* NOTE @@ -572,7 +572,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, case handed_over_to_network: /* assert something? */ if (bio_data_dir(req->master_bio) == WRITE) - atomic_add(req->size>>9, &mdev->ap_in_flight); + atomic_add(req->i.size >> 9, &mdev->ap_in_flight); if (bio_data_dir(req->master_bio) == WRITE && mdev->net_conf->wire_protocol == DRBD_PROT_A) { @@ -608,7 +608,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); req->rq_state |= RQ_NET_DONE; if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE) - atomic_sub(req->size>>9, &mdev->ap_in_flight); + atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); /* if it is still queued, we may not complete it here. * it will be canceled soon. */ @@ -625,7 +625,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, if (what == conflict_discarded_by_peer) dev_alert(DEV, "Got DiscardAck packet %llus +%u!" " DRBD is not a random data generator!\n", - (unsigned long long)req->sector, req->size); + (unsigned long long)req->i.sector, req->i.size); req->rq_state |= RQ_NET_DONE; /* fall through */ case write_acked_by_peer: @@ -647,7 +647,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state |= RQ_NET_OK; D_ASSERT(req->rq_state & RQ_NET_PENDING); dec_ap_pending(mdev); - atomic_sub(req->size>>9, &mdev->ap_in_flight); + atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); req->rq_state &= ~RQ_NET_PENDING; _req_may_be_done_not_susp(req, m); break; @@ -656,7 +656,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* assert something? */ if (req->rq_state & RQ_NET_PENDING) { dec_ap_pending(mdev); - atomic_sub(req->size>>9, &mdev->ap_in_flight); + atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); } req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); @@ -715,7 +715,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, if ((req->rq_state & RQ_NET_MASK) != 0) { req->rq_state |= RQ_NET_DONE; if (mdev->net_conf->wire_protocol == DRBD_PROT_A) - atomic_sub(req->size>>9, &mdev->ap_in_flight); + atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); } _req_may_be_done(req, m); /* Allowed while state.susp */ break; diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index a773636cca9..2520186c4c2 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -272,8 +272,8 @@ static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev, req->mdev = mdev; req->master_bio = bio_src; req->epoch = 0; - req->sector = bio_src->bi_sector; - req->size = bio_src->bi_size; + req->i.sector = bio_src->bi_sector; + req->i.size = bio_src->bi_size; INIT_HLIST_NODE(&req->collision); INIT_LIST_HEAD(&req->tl_requests); INIT_LIST_HEAD(&req->w.list); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 43a9fefd29b..a1eff6e9c0e 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1288,7 +1288,7 @@ int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return 1; } - ok = drbd_send_drequest(mdev, P_DATA_REQUEST, req->sector, req->size, + ok = drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size, (unsigned long)req); if (!ok) { @@ -1307,7 +1307,7 @@ int w_restart_disk_io(struct drbd_conf *mdev, struct drbd_work *w, int cancel) struct drbd_request *req = container_of(w, struct drbd_request, w); if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG) - drbd_al_begin_io(mdev, req->sector); + drbd_al_begin_io(mdev, req->i.sector); /* Calling drbd_al_begin_io() out of the worker might deadlocks theoretically. Practically it can not deadlock, since this is only used when unfreezing IOs. All the extents of the requests From de696716e8c40475d259fb49b3876ca0d9415970 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 20 Jan 2011 15:00:24 +0100 Subject: [PATCH 015/609] drbd: Use interval tree for overlapping write request detection Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 3 ++ drivers/block/drbd/drbd_main.c | 1 + drivers/block/drbd/drbd_receiver.c | 38 ++++++++++---------- drivers/block/drbd/drbd_req.c | 56 ++++++++++++++++-------------- drivers/block/drbd/drbd_req.h | 1 + 5 files changed, 52 insertions(+), 47 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index d7678e85031..058371318da 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1019,6 +1019,9 @@ struct drbd_conf { struct hlist_head *tl_hash; unsigned int tl_hash_s; + /* Interval tree of pending local write requests */ + struct rb_root write_requests; + /* blocks to resync in this run [unit BM_BLOCK_SIZE] */ unsigned long rs_total; /* number of resync blocks that failed in this run */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index a77b4bfd452..4d85838f53e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3473,6 +3473,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor) /* no need to lock access, we are still initializing this minor device. */ if (!tl_init(mdev)) goto out_no_tl; + mdev->write_requests = RB_ROOT; mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL); if (!mdev->app_reads_hash) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6bb1a2f2a38..6b072584250 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1733,9 +1733,6 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned const int size = e->size; const int discard = test_bit(DISCARD_CONCURRENT, &mdev->flags); DEFINE_WAIT(wait); - struct drbd_request *i; - struct hlist_node *n; - struct hlist_head *slot; int first; D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); @@ -1783,30 +1780,31 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned hlist_add_head(&e->collision, ee_hash_slot(mdev, sector)); -#define OVERLAPS overlaps(i->i.sector, i->i.size, sector, size) - slot = tl_hash_slot(mdev, sector); first = 1; for (;;) { + struct drbd_interval *i; int have_unacked = 0; int have_conflict = 0; prepare_to_wait(&mdev->misc_wait, &wait, TASK_INTERRUPTIBLE); - hlist_for_each_entry(i, n, slot, collision) { - if (OVERLAPS) { - /* only ALERT on first iteration, - * we may be woken up early... */ - if (first) - dev_alert(DEV, "%s[%u] Concurrent local write detected!" - " new: %llus +%u; pending: %llus +%u\n", - current->comm, current->pid, - (unsigned long long)sector, size, - (unsigned long long)i->i.sector, i->i.size); - if (i->rq_state & RQ_NET_PENDING) - ++have_unacked; - ++have_conflict; - } + + i = drbd_find_overlap(&mdev->write_requests, sector, size); + if (i) { + struct drbd_request *req2 = + container_of(i, struct drbd_request, i); + + /* only ALERT on first iteration, + * we may be woken up early... */ + if (first) + dev_alert(DEV, "%s[%u] Concurrent local write detected!" + " new: %llus +%u; pending: %llus +%u\n", + current->comm, current->pid, + (unsigned long long)sector, size, + (unsigned long long)req2->i.sector, req2->i.size); + if (req2->rq_state & RQ_NET_PENDING) + ++have_unacked; + ++have_conflict; } -#undef OVERLAPS if (!have_conflict) break; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 1af11a198b5..593576fcf64 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -135,7 +135,6 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, struct drbd_request *req) { const unsigned long s = req->rq_state; - struct drbd_request *i; struct drbd_epoch_entry *e; struct hlist_node *n; struct hlist_head *slot; @@ -157,19 +156,21 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, if ((s & RQ_NET_DONE) && mdev->ee_hash != NULL) { const sector_t sector = req->i.sector; const int size = req->i.size; + struct drbd_interval *i; /* ASSERT: * there must be no conflicting requests, since * they must have been failed on the spot */ -#define OVERLAPS overlaps(sector, size, i->i.sector, i->i.size) - slot = tl_hash_slot(mdev, sector); - hlist_for_each_entry(i, n, slot, collision) { - if (OVERLAPS) { - dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; " - "other: %p %llus +%u\n", - req, (unsigned long long)sector, size, - i, (unsigned long long)i->i.sector, i->i.size); - } + + i = drbd_find_overlap(&mdev->write_requests, sector, size); + if (i) { + struct drbd_request *req2 = + container_of(i, struct drbd_request, i); + + dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; " + "other: %p %llus +%u\n", + req, (unsigned long long)sector, size, + i, (unsigned long long)req2->i.sector, req2->i.size); } /* maybe "wake" those conflicting epoch entries @@ -184,7 +185,6 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, * * anyways, if we found one, * we just have to do a wake_up. */ -#undef OVERLAPS #define OVERLAPS overlaps(sector, size, e->sector, e->size) slot = ee_hash_slot(mdev, req->i.sector); hlist_for_each_entry(e, n, slot, collision) { @@ -260,9 +260,11 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) /* remove the request from the conflict detection * respective block_id verification hash */ - if (!hlist_unhashed(&req->collision)) + if (!hlist_unhashed(&req->collision)) { hlist_del(&req->collision); - else + if (!drbd_interval_empty(&req->i)) + drbd_remove_interval(&mdev->write_requests, &req->i); + } else D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0); /* for writes we need to do some extra housekeeping */ @@ -324,7 +326,7 @@ static int _req_conflicts(struct drbd_request *req) struct drbd_conf *mdev = req->mdev; const sector_t sector = req->i.sector; const int size = req->i.size; - struct drbd_request *i; + struct drbd_interval *i; struct drbd_epoch_entry *e; struct hlist_node *n; struct hlist_head *slot; @@ -339,24 +341,23 @@ static int _req_conflicts(struct drbd_request *req) goto out_no_conflict; BUG_ON(mdev->tl_hash == NULL); -#define OVERLAPS overlaps(i->i.sector, i->i.size, sector, size) - slot = tl_hash_slot(mdev, sector); - hlist_for_each_entry(i, n, slot, collision) { - if (OVERLAPS) { - dev_alert(DEV, "%s[%u] Concurrent local write detected! " - "[DISCARD L] new: %llus +%u; " - "pending: %llus +%u\n", - current->comm, current->pid, - (unsigned long long)sector, size, - (unsigned long long)i->i.sector, i->i.size); - goto out_conflict; - } + i = drbd_find_overlap(&mdev->write_requests, sector, size); + if (i) { + struct drbd_request *req2 = + container_of(i, struct drbd_request, i); + + dev_alert(DEV, "%s[%u] Concurrent local write detected! " + "[DISCARD L] new: %llus +%u; " + "pending: %llus +%u\n", + current->comm, current->pid, + (unsigned long long)sector, size, + (unsigned long long)req2->i.sector, req2->i.size); + goto out_conflict; } if (mdev->ee_hash_s) { /* now, check for overlapping requests with remote origin */ BUG_ON(mdev->ee_hash == NULL); -#undef OVERLAPS #define OVERLAPS overlaps(e->sector, e->size, sector, size) slot = ee_hash_slot(mdev, sector); hlist_for_each_entry(e, n, slot, collision) { @@ -509,6 +510,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, hlist_add_head(&req->collision, tl_hash_slot(mdev, req->i.sector)); /* corresponding hlist_del is in _req_may_be_done() */ + drbd_insert_interval(&mdev->write_requests, &req->i); /* NOTE * In case the req ended up on the transfer log before being diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 2520186c4c2..6f11624cce3 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -275,6 +275,7 @@ static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev, req->i.sector = bio_src->bi_sector; req->i.size = bio_src->bi_size; INIT_HLIST_NODE(&req->collision); + drbd_clear_interval(&req->i); INIT_LIST_HEAD(&req->tl_requests); INIT_LIST_HEAD(&req->w.list); } From dac1389ccc273b5486f2931c64c8e1672f233727 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 21 Jan 2011 17:18:39 +0100 Subject: [PATCH 016/609] drbd: Add read_requests tree We do not do collision detection for read requests, but we still need to look up the request objects when we receive a package over the network. Using the same data structure for read and write requests results in simpler code once the tl_hash and app_reads_hash tables are removed. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 3 ++- drivers/block/drbd/drbd_main.c | 1 + drivers/block/drbd/drbd_req.c | 13 ++++++++++--- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 058371318da..46a4332d344 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1019,7 +1019,8 @@ struct drbd_conf { struct hlist_head *tl_hash; unsigned int tl_hash_s; - /* Interval tree of pending local write requests */ + /* Interval tree of pending local requests */ + struct rb_root read_requests; struct rb_root write_requests; /* blocks to resync in this run [unit BM_BLOCK_SIZE] */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 4d85838f53e..c0ea5baa9a1 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3473,6 +3473,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor) /* no need to lock access, we are still initializing this minor device. */ if (!tl_init(mdev)) goto out_no_tl; + mdev->read_requests = RB_ROOT; mdev->write_requests = RB_ROOT; mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 593576fcf64..d2a78c4ee91 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -260,10 +260,15 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) /* remove the request from the conflict detection * respective block_id verification hash */ - if (!hlist_unhashed(&req->collision)) { + if (!drbd_interval_empty(&req->i)) { + struct rb_root *root; + hlist_del(&req->collision); - if (!drbd_interval_empty(&req->i)) - drbd_remove_interval(&mdev->write_requests, &req->i); + if (rw == WRITE) + root = &mdev->write_requests; + else + root = &mdev->read_requests; + drbd_remove_interval(root, &req->i); } else D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0); @@ -332,6 +337,7 @@ static int _req_conflicts(struct drbd_request *req) struct hlist_head *slot; D_ASSERT(hlist_unhashed(&req->collision)); + D_ASSERT(drbd_interval_empty(&req->i)); if (!get_net_conf(mdev)) return 0; @@ -493,6 +499,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* so we can verify the handle in the answer packet * corresponding hlist_del is in _req_may_be_done() */ hlist_add_head(&req->collision, ar_hash_slot(mdev, req->i.sector)); + drbd_insert_interval(&mdev->read_requests, &req->i); set_bit(UNPLUG_REMOTE, &mdev->flags); From bc9c5c41181a84ad243639c79a10f621a97af44b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 21 Jan 2011 18:00:55 +0100 Subject: [PATCH 017/609] drbd: Use the read and write request trees for request lookups Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_interval.h | 3 +- drivers/block/drbd/drbd_receiver.c | 44 +++++++++++------------------- 2 files changed, 17 insertions(+), 30 deletions(-) diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h index bf8dcf7bab0..a847b4a07b2 100644 --- a/drivers/block/drbd/drbd_interval.h +++ b/drivers/block/drbd/drbd_interval.h @@ -22,8 +22,7 @@ static inline bool drbd_interval_empty(struct drbd_interval *i) } bool drbd_insert_interval(struct rb_root *, struct drbd_interval *); -struct drbd_interval *drbd_find_interval(struct rb_root *, sector_t, - struct drbd_interval *); +bool drbd_contains_interval(struct rb_root *, sector_t, struct drbd_interval *); void drbd_remove_interval(struct rb_root *, struct drbd_interval *); struct drbd_interval *drbd_find_overlap(struct rb_root *, sector_t, unsigned int); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6b072584250..b148398b5aa 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1470,27 +1470,15 @@ fail: } static struct drbd_request * -find_request(struct drbd_conf *mdev, - struct hlist_head *(*hash_slot)(struct drbd_conf *, sector_t), - u64 id, sector_t sector, bool missing_ok, const char *func) +find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id, + sector_t sector, bool missing_ok, const char *func) { - struct hlist_head *slot = hash_slot(mdev, sector); - struct hlist_node *n; struct drbd_request *req; - hlist_for_each_entry(req, n, slot, collision) { - if ((unsigned long)req != (unsigned long)id) - continue; - if (req->i.sector != sector) { - dev_err(DEV, "%s: found request %lu but it has " - "wrong sector (%llus versus %llus)\n", - func, (unsigned long)req, - (unsigned long long)req->i.sector, - (unsigned long long)sector); - return NULL; - } + /* Request object according to our peer */ + req = (struct drbd_request *)(unsigned long)id; + if (drbd_contains_interval(root, sector, &req->i)) return req; - } if (!missing_ok) { dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func, (unsigned long)id, (unsigned long long)sector); @@ -1508,7 +1496,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi sector = be64_to_cpu(p->sector); spin_lock_irq(&mdev->req_lock); - req = find_request(mdev, ar_hash_slot, p->block_id, sector, false, __func__); + req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__); spin_unlock_irq(&mdev->req_lock); if (unlikely(!req)) return false; @@ -4245,16 +4233,16 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) return true; } -static int validate_req_change_req_state(struct drbd_conf *mdev, - u64 id, sector_t sector, - struct hlist_head *(*hash_slot)(struct drbd_conf *, sector_t), - const char *func, enum drbd_req_event what, bool missing_ok) +static int +validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector, + struct rb_root *root, const char *func, + enum drbd_req_event what, bool missing_ok) { struct drbd_request *req; struct bio_and_error m; spin_lock_irq(&mdev->req_lock); - req = find_request(mdev, hash_slot, id, sector, missing_ok, func); + req = find_request(mdev, root, id, sector, missing_ok, func); if (unlikely(!req)) { spin_unlock_irq(&mdev->req_lock); return false; @@ -4304,8 +4292,8 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) } return validate_req_change_req_state(mdev, p->block_id, sector, - tl_hash_slot, __func__, what, - false); + &mdev->write_requests, __func__, + what, false); } static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) @@ -4326,7 +4314,7 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) } found = validate_req_change_req_state(mdev, p->block_id, sector, - tl_hash_slot, __func__, + &mdev->write_requests, __func__, neg_acked, missing_ok); if (!found) { /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs. @@ -4351,8 +4339,8 @@ static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h) (unsigned long long)sector, be32_to_cpu(p->blksize)); return validate_req_change_req_state(mdev, p->block_id, sector, - ar_hash_slot, __func__, neg_acked, - false); + &mdev->read_requests, __func__, + neg_acked, false); } static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h) From 010f6e678ffddbf3134863038c5b2f6509f1eed3 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 14 Jan 2011 20:59:35 +0100 Subject: [PATCH 018/609] drbd: Put sector and size in struct drbd_epoch_entry into struct drbd_interval Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 3 +-- drivers/block/drbd/drbd_main.c | 14 +++++----- drivers/block/drbd/drbd_nl.c | 6 ++--- drivers/block/drbd/drbd_receiver.c | 28 ++++++++++---------- drivers/block/drbd/drbd_req.c | 6 ++--- drivers/block/drbd/drbd_worker.c | 42 +++++++++++++++--------------- 6 files changed, 49 insertions(+), 50 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 46a4332d344..fa722a986e0 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -764,10 +764,9 @@ struct drbd_epoch_entry { struct drbd_conf *mdev; struct page *pages; atomic_t pending_bios; - unsigned int size; + struct drbd_interval i; /* see comments on ee flag bits below */ unsigned long flags; - sector_t sector; union { u64 block_id; struct digest_info *digest; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index c0ea5baa9a1..003313711ef 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2464,8 +2464,8 @@ int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, struct drbd_epoch_entry *e) { return _drbd_send_ack(mdev, cmd, - cpu_to_be64(e->sector), - cpu_to_be32(e->size), + cpu_to_be64(e->i.sector), + cpu_to_be32(e->i.size), e->block_id); } @@ -2671,7 +2671,7 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) { struct page *page = e->pages; - unsigned len = e->size; + unsigned len = e->i.size; /* hint all but last page with MSG_MORE */ page_chain_for_each(page) { unsigned l = min_t(unsigned, len, PAGE_SIZE); @@ -2796,19 +2796,19 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; - if (e->size <= DRBD_MAX_SIZE_H80_PACKET) { + if (e->i.size <= DRBD_MAX_SIZE_H80_PACKET) { p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); p.head.h80.command = cpu_to_be16(cmd); p.head.h80.length = - cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + dgs + e->size); + cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size); } else { p.head.h95.magic = cpu_to_be16(DRBD_MAGIC_BIG); p.head.h95.command = cpu_to_be16(cmd); p.head.h95.length = - cpu_to_be32(sizeof(p) - sizeof(struct p_header80) + dgs + e->size); + cpu_to_be32(sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size); } - p.sector = cpu_to_be64(e->sector); + p.sector = cpu_to_be64(e->i.sector); p.block_id = e->block_id; /* p.seq_num = 0; No sequence numbers here.. */ diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 515bcd948a4..98c0e9b871e 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2506,7 +2506,7 @@ void drbd_bcast_ee(struct drbd_conf *mdev, if (!cn_reply) { dev_err(DEV, "could not kmalloc buffer for drbd_bcast_ee, sector %llu, size %u\n", - (unsigned long long)e->sector, e->size); + (unsigned long long)e->i.sector, e->i.size); return; } @@ -2516,11 +2516,11 @@ void drbd_bcast_ee(struct drbd_conf *mdev, tl = tl_add_str(tl, T_dump_ee_reason, reason); tl = tl_add_blob(tl, T_seen_digest, seen_hash, dgs); tl = tl_add_blob(tl, T_calc_digest, calc_hash, dgs); - tl = tl_add_int(tl, T_ee_sector, &e->sector); + tl = tl_add_int(tl, T_ee_sector, &e->i.sector); tl = tl_add_int(tl, T_ee_block_id, &e->block_id); /* dump the first 32k */ - len = min_t(unsigned, e->size, 32 << 10); + len = min_t(unsigned, e->i.size, 32 << 10); put_unaligned(T_ee_data, tl++); put_unaligned(len, tl++); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index b148398b5aa..42c0ffabad7 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -338,9 +338,9 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, e->mdev = mdev; e->pages = page; atomic_set(&e->pending_bios, 0); - e->size = data_size; + e->i.size = data_size; e->flags = 0; - e->sector = sector; + e->i.sector = sector; /* * The block_id is opaque to the receiver. It is not endianness * converted, and sent back to the sender unchanged. @@ -1091,8 +1091,8 @@ int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, struct bio *bios = NULL; struct bio *bio; struct page *page = e->pages; - sector_t sector = e->sector; - unsigned ds = e->size; + sector_t sector = e->i.sector; + unsigned ds = e->i.size; unsigned n_bios = 0; unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; int err = -ENOMEM; @@ -1107,7 +1107,7 @@ next_bio: dev_err(DEV, "submit_ee: Allocation of a bio failed\n"); goto fail; } - /* > e->sector, unless this is the first bio */ + /* > e->i.sector, unless this is the first bio */ bio->bi_sector = sector; bio->bi_bdev = mdev->ldev->backing_bdev; bio->bi_rw = rw; @@ -1414,17 +1414,17 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused) { struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; - sector_t sector = e->sector; + sector_t sector = e->i.sector; int ok; D_ASSERT(hlist_unhashed(&e->collision)); if (likely((e->flags & EE_WAS_ERROR) == 0)) { - drbd_set_in_sync(mdev, sector, e->size); + drbd_set_in_sync(mdev, sector, e->i.size); ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, e); } else { /* Record failure to sync */ - drbd_rs_failed_io(mdev, sector, e->size); + drbd_rs_failed_io(mdev, sector, e->i.size); ok = drbd_send_ack(mdev, P_NEG_ACK, e); } @@ -1549,7 +1549,7 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packets cmd, un static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; - sector_t sector = e->sector; + sector_t sector = e->i.sector; int ok = 1, pcmd; if (mdev->net_conf->wire_protocol == DRBD_PROT_C) { @@ -1560,7 +1560,7 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) P_RS_WRITE_ACK : P_WRITE_ACK; ok &= drbd_send_ack(mdev, pcmd, e); if (pcmd == P_RS_WRITE_ACK) - drbd_set_in_sync(mdev, sector, e->size); + drbd_set_in_sync(mdev, sector, e->i.size); } else { ok = drbd_send_ack(mdev, P_NEG_ACK, e); /* we expect it to be marked out of sync anyways... @@ -1718,7 +1718,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned } else { /* don't get the req_lock yet, * we may sleep in drbd_wait_peer_seq */ - const int size = e->size; + const int size = e->i.size; const int discard = test_bit(DISCARD_CONCURRENT, &mdev->flags); DEFINE_WAIT(wait); int first; @@ -1861,10 +1861,10 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned if (mdev->state.pdsk < D_INCONSISTENT) { /* In case we have the only disk of the cluster, */ - drbd_set_out_of_sync(mdev, e->sector, e->size); + drbd_set_out_of_sync(mdev, e->i.sector, e->i.size); e->flags |= EE_CALL_AL_COMPLETE_IO; e->flags &= ~EE_MAY_SET_IN_SYNC; - drbd_al_begin_io(mdev, e->sector); + drbd_al_begin_io(mdev, e->i.sector); } if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0) @@ -1877,7 +1877,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned hlist_del_init(&e->collision); spin_unlock_irq(&mdev->req_lock); if (e->flags & EE_CALL_AL_COMPLETE_IO) - drbd_al_complete_io(mdev, e->sector); + drbd_al_complete_io(mdev, e->i.sector); out_interrupted: drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + EV_CLEANUP); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index d2a78c4ee91..5bf93a7c91b 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -185,7 +185,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, * * anyways, if we found one, * we just have to do a wake_up. */ -#define OVERLAPS overlaps(sector, size, e->sector, e->size) +#define OVERLAPS overlaps(sector, size, e->i.sector, e->i.size) slot = ee_hash_slot(mdev, req->i.sector); hlist_for_each_entry(e, n, slot, collision) { if (OVERLAPS) { @@ -364,7 +364,7 @@ static int _req_conflicts(struct drbd_request *req) if (mdev->ee_hash_s) { /* now, check for overlapping requests with remote origin */ BUG_ON(mdev->ee_hash == NULL); -#define OVERLAPS overlaps(e->sector, e->size, sector, size) +#define OVERLAPS overlaps(e->i.sector, e->i.size, sector, size) slot = ee_hash_slot(mdev, sector); hlist_for_each_entry(e, n, slot, collision) { if (OVERLAPS) { @@ -373,7 +373,7 @@ static int _req_conflicts(struct drbd_request *req) "pending: %llus +%u\n", current->comm, current->pid, (unsigned long long)sector, size, - (unsigned long long)e->sector, e->size); + (unsigned long long)e->i.sector, e->i.size); goto out_conflict; } } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index a1eff6e9c0e..2b83aaf02c3 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -86,7 +86,7 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local) struct drbd_conf *mdev = e->mdev; spin_lock_irqsave(&mdev->req_lock, flags); - mdev->read_cnt += e->size >> 9; + mdev->read_cnt += e->i.size >> 9; list_del(&e->w.list); if (list_empty(&mdev->read_ee)) wake_up(&mdev->ee_wait); @@ -113,12 +113,12 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo * we may no longer access it, * it may be freed/reused already! * (as soon as we release the req_lock) */ - e_sector = e->sector; + e_sector = e->i.sector; do_al_complete_io = e->flags & EE_CALL_AL_COMPLETE_IO; block_id = e->block_id; spin_lock_irqsave(&mdev->req_lock, flags); - mdev->writ_cnt += e->size >> 9; + mdev->writ_cnt += e->i.size >> 9; list_del(&e->w.list); /* has been on active_ee or sync_ee */ list_add_tail(&e->w.list, &mdev->done_ee); @@ -159,12 +159,12 @@ void drbd_endio_sec(struct bio *bio, int error) if (error && __ratelimit(&drbd_ratelimit_state)) dev_warn(DEV, "%s: error=%d s=%llus\n", is_write ? "write" : "read", error, - (unsigned long long)e->sector); + (unsigned long long)e->i.sector); if (!error && !uptodate) { if (__ratelimit(&drbd_ratelimit_state)) dev_warn(DEV, "%s: setting error to -EIO s=%llus\n", is_write ? "write" : "read", - (unsigned long long)e->sector); + (unsigned long long)e->i.sector); /* strange behavior of some lower level drivers... * fail the request by clearing the uptodate flag, * but do not return any error?! */ @@ -265,7 +265,7 @@ void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, struct drbd_e page = tmp; } /* and now the last, possibly only partially used page */ - len = e->size & (PAGE_SIZE - 1); + len = e->i.size & (PAGE_SIZE - 1); sg_set_page(&sg, page, len ?: PAGE_SIZE, 0); crypto_hash_update(&desc, &sg, sg.length); crypto_hash_final(&desc, digest); @@ -308,8 +308,8 @@ int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) digest_size = crypto_hash_digestsize(mdev->csums_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (digest) { - sector_t sector = e->sector; - unsigned int size = e->size; + sector_t sector = e->i.sector; + unsigned int size = e->i.size; drbd_csum_ee(mdev, mdev->csums_tfm, e, digest); /* Free e and pages before send. * In case we block on congestion, we could otherwise run into @@ -901,7 +901,7 @@ static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_epoch_ent { if (drbd_ee_has_active_page(e)) { /* This might happen if sendpage() has not finished */ - int i = (e->size + PAGE_SIZE -1) >> PAGE_SHIFT; + int i = (e->i.size + PAGE_SIZE -1) >> PAGE_SHIFT; atomic_add(i, &mdev->pp_in_use_by_net); atomic_sub(i, &mdev->pp_in_use); spin_lock_irq(&mdev->req_lock); @@ -934,7 +934,7 @@ int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } else { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Sending NegDReply. sector=%llus.\n", - (unsigned long long)e->sector); + (unsigned long long)e->i.sector); ok = drbd_send_ack(mdev, P_NEG_DREPLY, e); } @@ -966,7 +966,7 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } if (get_ldev_if_state(mdev, D_FAILED)) { - drbd_rs_complete_io(mdev, e->sector); + drbd_rs_complete_io(mdev, e->i.sector); put_ldev(mdev); } @@ -985,12 +985,12 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } else { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Sending NegRSDReply. sector %llus.\n", - (unsigned long long)e->sector); + (unsigned long long)e->i.sector); ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e); /* update resync data with failure */ - drbd_rs_failed_io(mdev, e->sector, e->size); + drbd_rs_failed_io(mdev, e->i.sector, e->i.size); } dec_unacked(mdev); @@ -1017,7 +1017,7 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } if (get_ldev(mdev)) { - drbd_rs_complete_io(mdev, e->sector); + drbd_rs_complete_io(mdev, e->i.sector); put_ldev(mdev); } @@ -1039,9 +1039,9 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } if (eq) { - drbd_set_in_sync(mdev, e->sector, e->size); + drbd_set_in_sync(mdev, e->i.sector, e->i.size); /* rs_same_csums unit is BM_BLOCK_SIZE */ - mdev->rs_same_csum += e->size >> BM_BLOCK_SHIFT; + mdev->rs_same_csum += e->i.size >> BM_BLOCK_SHIFT; ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e); } else { inc_rs_pending(mdev); @@ -1068,8 +1068,8 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); - sector_t sector = e->sector; - unsigned int size = e->size; + sector_t sector = e->i.sector; + unsigned int size = e->i.size; int digest_size; void *digest; int ok = 1; @@ -1127,8 +1127,8 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); struct digest_info *di; void *digest; - sector_t sector = e->sector; - unsigned int size = e->size; + sector_t sector = e->i.sector; + unsigned int size = e->i.size; int digest_size; int ok, eq = 0; @@ -1141,7 +1141,7 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all * the resync lru has been cleaned up already */ if (get_ldev(mdev)) { - drbd_rs_complete_io(mdev, e->sector); + drbd_rs_complete_io(mdev, e->i.sector); put_ldev(mdev); } From 8b946255f8467e30f98988be426d8c1604d63ffd Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 20 Jan 2011 15:23:07 +0100 Subject: [PATCH 019/609] drbd: Use interval tree for overlapping epoch entry detection Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 3 ++ drivers/block/drbd/drbd_main.c | 1 + drivers/block/drbd/drbd_receiver.c | 15 ++++++++++ drivers/block/drbd/drbd_req.c | 44 +++++++++++------------------- 4 files changed, 35 insertions(+), 28 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index fa722a986e0..751a4d4ff07 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1080,6 +1080,9 @@ struct drbd_conf { struct hlist_head *ee_hash; /* is proteced by req_lock! */ unsigned int ee_hash_s; + /* Interval tree of pending remote write requests (struct drbd_epoch_entry) */ + struct rb_root epoch_entries; + /* this one is protected by ee_lock, single thread */ struct drbd_epoch_entry *last_write_w_barrier; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 003313711ef..18f27afab81 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3475,6 +3475,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor) goto out_no_tl; mdev->read_requests = RB_ROOT; mdev->write_requests = RB_ROOT; + mdev->epoch_entries = RB_ROOT; mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL); if (!mdev->app_reads_hash) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 42c0ffabad7..a0fbbfc77d8 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -334,6 +334,7 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, goto fail; INIT_HLIST_NODE(&e->collision); + drbd_clear_interval(&e->i); e->epoch = NULL; e->mdev = mdev; e->pages = page; @@ -361,6 +362,7 @@ void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int i drbd_pp_free(mdev, e->pages, is_net); D_ASSERT(atomic_read(&e->pending_bios) == 0); D_ASSERT(hlist_unhashed(&e->collision)); + D_ASSERT(drbd_interval_empty(&e->i)); mempool_free(e, drbd_ee_mempool); } @@ -1418,6 +1420,7 @@ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u int ok; D_ASSERT(hlist_unhashed(&e->collision)); + D_ASSERT(drbd_interval_empty(&e->i)); if (likely((e->flags & EE_WAS_ERROR) == 0)) { drbd_set_in_sync(mdev, sector, e->i.size); @@ -1574,9 +1577,13 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) spin_lock_irq(&mdev->req_lock); D_ASSERT(!hlist_unhashed(&e->collision)); hlist_del_init(&e->collision); + D_ASSERT(!drbd_interval_empty(&e->i)); + drbd_remove_interval(&mdev->epoch_entries, &e->i); + drbd_clear_interval(&e->i); spin_unlock_irq(&mdev->req_lock); } else { D_ASSERT(hlist_unhashed(&e->collision)); + D_ASSERT(drbd_interval_empty(&e->i)); } drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); @@ -1595,6 +1602,9 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u spin_lock_irq(&mdev->req_lock); D_ASSERT(!hlist_unhashed(&e->collision)); hlist_del_init(&e->collision); + D_ASSERT(!drbd_interval_empty(&e->i)); + drbd_remove_interval(&mdev->epoch_entries, &e->i); + drbd_clear_interval(&e->i); spin_unlock_irq(&mdev->req_lock); dec_unacked(mdev); @@ -1767,6 +1777,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned spin_lock_irq(&mdev->req_lock); hlist_add_head(&e->collision, ee_hash_slot(mdev, sector)); + drbd_insert_interval(&mdev->epoch_entries, &e->i); first = 1; for (;;) { @@ -1817,6 +1828,8 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned if (signal_pending(current)) { hlist_del_init(&e->collision); + drbd_remove_interval(&mdev->epoch_entries, &e->i); + drbd_clear_interval(&e->i); spin_unlock_irq(&mdev->req_lock); @@ -1875,6 +1888,8 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned spin_lock_irq(&mdev->req_lock); list_del(&e->w.list); hlist_del_init(&e->collision); + drbd_remove_interval(&mdev->epoch_entries, &e->i); + drbd_clear_interval(&e->i); spin_unlock_irq(&mdev->req_lock); if (e->flags & EE_CALL_AL_COMPLETE_IO) drbd_al_complete_io(mdev, e->i.sector); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 5bf93a7c91b..b81ce82eb15 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -135,9 +135,6 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, struct drbd_request *req) { const unsigned long s = req->rq_state; - struct drbd_epoch_entry *e; - struct hlist_node *n; - struct hlist_head *slot; /* Before we can signal completion to the upper layers, * we may need to close the current epoch. @@ -185,16 +182,10 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, * * anyways, if we found one, * we just have to do a wake_up. */ -#define OVERLAPS overlaps(sector, size, e->i.sector, e->i.size) - slot = ee_hash_slot(mdev, req->i.sector); - hlist_for_each_entry(e, n, slot, collision) { - if (OVERLAPS) { - wake_up(&mdev->misc_wait); - break; - } - } + i = drbd_find_overlap(&mdev->epoch_entries, sector, size); + if (i) + wake_up(&mdev->misc_wait); } -#undef OVERLAPS } void complete_master_bio(struct drbd_conf *mdev, @@ -332,9 +323,6 @@ static int _req_conflicts(struct drbd_request *req) const sector_t sector = req->i.sector; const int size = req->i.size; struct drbd_interval *i; - struct drbd_epoch_entry *e; - struct hlist_node *n; - struct hlist_head *slot; D_ASSERT(hlist_unhashed(&req->collision)); D_ASSERT(drbd_interval_empty(&req->i)); @@ -364,21 +352,21 @@ static int _req_conflicts(struct drbd_request *req) if (mdev->ee_hash_s) { /* now, check for overlapping requests with remote origin */ BUG_ON(mdev->ee_hash == NULL); -#define OVERLAPS overlaps(e->i.sector, e->i.size, sector, size) - slot = ee_hash_slot(mdev, sector); - hlist_for_each_entry(e, n, slot, collision) { - if (OVERLAPS) { - dev_alert(DEV, "%s[%u] Concurrent remote write detected!" - " [DISCARD L] new: %llus +%u; " - "pending: %llus +%u\n", - current->comm, current->pid, - (unsigned long long)sector, size, - (unsigned long long)e->i.sector, e->i.size); - goto out_conflict; - } + + i = drbd_find_overlap(&mdev->epoch_entries, sector, size); + if (i) { + struct drbd_epoch_entry *e = + container_of(i, struct drbd_epoch_entry, i); + + dev_alert(DEV, "%s[%u] Concurrent remote write detected!" + " [DISCARD L] new: %llus +%u; " + "pending: %llus +%u\n", + current->comm, current->pid, + (unsigned long long)sector, size, + (unsigned long long)e->i.sector, e->i.size); + goto out_conflict; } } -#undef OVERLAPS out_no_conflict: /* this is like it should be, and what we expected. From bb3bfe96144a4535d47ccfea444bc1ef8e02f4e3 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 21 Jan 2011 15:59:23 +0100 Subject: [PATCH 020/609] drbd: Remove the unused hash tables Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 13 ------- drivers/block/drbd/drbd_main.c | 57 ------------------------------ drivers/block/drbd/drbd_nl.c | 36 +------------------ drivers/block/drbd/drbd_receiver.c | 27 ++++---------- drivers/block/drbd/drbd_req.c | 26 ++++---------- drivers/block/drbd/drbd_req.h | 27 -------------- drivers/block/drbd/drbd_worker.c | 11 +++--- 7 files changed, 20 insertions(+), 177 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 751a4d4ff07..5874357b0f9 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -694,7 +694,6 @@ struct drbd_request { * see drbd_endio_pri(). */ struct bio *private_bio; - struct hlist_node collision; struct drbd_interval i; unsigned int epoch; /* barrier_nr */ @@ -759,7 +758,6 @@ struct digest_info { struct drbd_epoch_entry { struct drbd_work w; - struct hlist_node collision; struct drbd_epoch *epoch; /* for writes */ struct drbd_conf *mdev; struct page *pages; @@ -1015,8 +1013,6 @@ struct drbd_conf { struct drbd_tl_epoch *newest_tle; struct drbd_tl_epoch *oldest_tle; struct list_head out_of_sequence_requests; - struct hlist_head *tl_hash; - unsigned int tl_hash_s; /* Interval tree of pending local requests */ struct rb_root read_requests; @@ -1077,8 +1073,6 @@ struct drbd_conf { struct list_head done_ee; /* send ack */ struct list_head read_ee; /* IO in progress (any read) */ struct list_head net_ee; /* zero-copy network send in progress */ - struct hlist_head *ee_hash; /* is proteced by req_lock! */ - unsigned int ee_hash_s; /* Interval tree of pending remote write requests (struct drbd_epoch_entry) */ struct rb_root epoch_entries; @@ -1087,7 +1081,6 @@ struct drbd_conf { struct drbd_epoch_entry *last_write_w_barrier; int next_barrier_nr; - struct hlist_head *app_reads_hash; /* is proteced by req_lock */ struct list_head resync_reads; atomic_t pp_in_use; /* allocated from page pool */ atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */ @@ -1428,18 +1421,12 @@ struct bm_extent { #endif #endif -/* Sector shift value for the "hash" functions of tl_hash and ee_hash tables. - * With a value of 8 all IO in one 128K block make it to the same slot of the - * hash table. */ #define HT_SHIFT 8 #define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT)) #define DRBD_MAX_BIO_SIZE_SAFE (1 << 12) /* Works always = 4k */ #define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */ -/* Number of elements in the app_reads_hash */ -#define APP_R_HSIZE 15 - extern int drbd_bm_init(struct drbd_conf *mdev); extern int drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors, int set_new_bits); extern void drbd_bm_cleanup(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 18f27afab81..878f7d4fc88 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -209,9 +209,6 @@ static int tl_init(struct drbd_conf *mdev) mdev->newest_tle = b; INIT_LIST_HEAD(&mdev->out_of_sequence_requests); - mdev->tl_hash = NULL; - mdev->tl_hash_s = 0; - return 1; } @@ -223,39 +220,6 @@ static void tl_cleanup(struct drbd_conf *mdev) mdev->oldest_tle = NULL; kfree(mdev->unused_spare_tle); mdev->unused_spare_tle = NULL; - kfree(mdev->tl_hash); - mdev->tl_hash = NULL; - mdev->tl_hash_s = 0; -} - -static void drbd_free_tl_hash(struct drbd_conf *mdev) -{ - struct hlist_head *h; - - spin_lock_irq(&mdev->req_lock); - - if (!mdev->tl_hash || mdev->state.conn != C_STANDALONE) { - spin_unlock_irq(&mdev->req_lock); - return; - } - /* paranoia code */ - for (h = mdev->ee_hash; h < mdev->ee_hash + mdev->ee_hash_s; h++) - if (h->first) - dev_err(DEV, "ASSERT FAILED ee_hash[%u].first == %p, expected NULL\n", - (int)(h - mdev->ee_hash), h->first); - kfree(mdev->ee_hash); - mdev->ee_hash = NULL; - mdev->ee_hash_s = 0; - - /* paranoia code */ - for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++) - if (h->first) - dev_err(DEV, "ASSERT FAILED tl_hash[%u] == %p, expected NULL\n", - (int)(h - mdev->tl_hash), h->first); - kfree(mdev->tl_hash); - mdev->tl_hash = NULL; - mdev->tl_hash_s = 0; - spin_unlock_irq(&mdev->req_lock); } /** @@ -475,8 +439,6 @@ void tl_clear(struct drbd_conf *mdev) /* ensure bit indicating barrier is required is clear */ clear_bit(CREATE_BARRIER, &mdev->flags); - memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *)); - spin_unlock_irq(&mdev->req_lock); } @@ -1633,10 +1595,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, put_ldev(mdev); } - /* free tl_hash if we Got thawed and are C_STANDALONE */ - if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash) - drbd_free_tl_hash(mdev); - /* Upon network connection, we need to start the receiver */ if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED) drbd_thread_start(&mdev->receiver); @@ -3317,13 +3275,6 @@ static void drbd_delete_device(unsigned int minor) drbd_release_ee_lists(mdev); - /* should be freed on disconnect? */ - kfree(mdev->ee_hash); - /* - mdev->ee_hash_s = 0; - mdev->ee_hash = NULL; - */ - lc_destroy(mdev->act_log); lc_destroy(mdev->resync); @@ -3477,10 +3428,6 @@ struct drbd_conf *drbd_new_device(unsigned int minor) mdev->write_requests = RB_ROOT; mdev->epoch_entries = RB_ROOT; - mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL); - if (!mdev->app_reads_hash) - goto out_no_app_reads; - mdev->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL); if (!mdev->current_epoch) goto out_no_epoch; @@ -3493,8 +3440,6 @@ struct drbd_conf *drbd_new_device(unsigned int minor) /* out_whatever_else: kfree(mdev->current_epoch); */ out_no_epoch: - kfree(mdev->app_reads_hash); -out_no_app_reads: tl_cleanup(mdev); out_no_tl: drbd_bm_cleanup(mdev); @@ -3516,7 +3461,6 @@ out_no_cpumask: void drbd_free_mdev(struct drbd_conf *mdev) { kfree(mdev->current_epoch); - kfree(mdev->app_reads_hash); tl_cleanup(mdev); if (mdev->bitmap) /* should no longer be there. */ drbd_bm_cleanup(mdev); @@ -3524,7 +3468,6 @@ void drbd_free_mdev(struct drbd_conf *mdev) put_disk(mdev->vdisk); blk_cleanup_queue(mdev->rq_queue); free_cpumask_var(mdev->cpu_mask); - drbd_free_tl_hash(mdev); kfree(mdev); } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 98c0e9b871e..5b8ebbef95d 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1353,14 +1353,12 @@ static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { - int i, ns; + int i; enum drbd_ret_code retcode; struct net_conf *new_conf = NULL; struct crypto_hash *tfm = NULL; struct crypto_hash *integrity_w_tfm = NULL; struct crypto_hash *integrity_r_tfm = NULL; - struct hlist_head *new_tl_hash = NULL; - struct hlist_head *new_ee_hash = NULL; struct drbd_conf *odev; char hmac_name[CRYPTO_MAX_ALG_NAME]; void *int_dig_out = NULL; @@ -1494,24 +1492,6 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, } } - ns = new_conf->max_epoch_size/8; - if (mdev->tl_hash_s != ns) { - new_tl_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL); - if (!new_tl_hash) { - retcode = ERR_NOMEM; - goto fail; - } - } - - ns = new_conf->max_buffers/8; - if (new_conf->two_primaries && (mdev->ee_hash_s != ns)) { - new_ee_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL); - if (!new_ee_hash) { - retcode = ERR_NOMEM; - goto fail; - } - } - ((char *)new_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0; if (integrity_w_tfm) { @@ -1552,18 +1532,6 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, mdev->send_cnt = 0; mdev->recv_cnt = 0; - if (new_tl_hash) { - kfree(mdev->tl_hash); - mdev->tl_hash_s = mdev->net_conf->max_epoch_size/8; - mdev->tl_hash = new_tl_hash; - } - - if (new_ee_hash) { - kfree(mdev->ee_hash); - mdev->ee_hash_s = mdev->net_conf->max_buffers/8; - mdev->ee_hash = new_ee_hash; - } - crypto_free_hash(mdev->cram_hmac_tfm); mdev->cram_hmac_tfm = tfm; @@ -1594,8 +1562,6 @@ fail: crypto_free_hash(tfm); crypto_free_hash(integrity_w_tfm); crypto_free_hash(integrity_r_tfm); - kfree(new_tl_hash); - kfree(new_ee_hash); kfree(new_conf); reply->ret_code = retcode; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index a0fbbfc77d8..566317bb74e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -333,7 +333,6 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, if (!page) goto fail; - INIT_HLIST_NODE(&e->collision); drbd_clear_interval(&e->i); e->epoch = NULL; e->mdev = mdev; @@ -361,7 +360,6 @@ void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int i kfree(e->digest); drbd_pp_free(mdev, e->pages, is_net); D_ASSERT(atomic_read(&e->pending_bios) == 0); - D_ASSERT(hlist_unhashed(&e->collision)); D_ASSERT(drbd_interval_empty(&e->i)); mempool_free(e, drbd_ee_mempool); } @@ -1419,7 +1417,6 @@ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u sector_t sector = e->i.sector; int ok; - D_ASSERT(hlist_unhashed(&e->collision)); D_ASSERT(drbd_interval_empty(&e->i)); if (likely((e->flags & EE_WAS_ERROR) == 0)) { @@ -1575,16 +1572,12 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ if (mdev->net_conf->two_primaries) { spin_lock_irq(&mdev->req_lock); - D_ASSERT(!hlist_unhashed(&e->collision)); - hlist_del_init(&e->collision); D_ASSERT(!drbd_interval_empty(&e->i)); drbd_remove_interval(&mdev->epoch_entries, &e->i); drbd_clear_interval(&e->i); spin_unlock_irq(&mdev->req_lock); - } else { - D_ASSERT(hlist_unhashed(&e->collision)); + } else D_ASSERT(drbd_interval_empty(&e->i)); - } drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); @@ -1600,8 +1593,6 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u ok = drbd_send_ack(mdev, P_DISCARD_ACK, e); spin_lock_irq(&mdev->req_lock); - D_ASSERT(!hlist_unhashed(&e->collision)); - hlist_del_init(&e->collision); D_ASSERT(!drbd_interval_empty(&e->i)); drbd_remove_interval(&mdev->epoch_entries, &e->i); drbd_clear_interval(&e->i); @@ -1734,23 +1725,20 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int first; D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); - BUG_ON(mdev->ee_hash == NULL); - BUG_ON(mdev->tl_hash == NULL); /* conflict detection and handling: * 1. wait on the sequence number, * in case this data packet overtook ACK packets. - * 2. check our hash tables for conflicting requests. - * we only need to walk the tl_hash, since an ee can not - * have a conflict with an other ee: on the submitting - * node, the corresponding req had already been conflicting, - * and a conflicting req is never sent. + * 2. check our interval trees for conflicting requests: + * we only need to check the write_requests tree; the + * epoch_entries tree cannot contain any overlaps because + * they were already eliminated on the submitting node. * * Note: for two_primaries, we are protocol C, * so there cannot be any request that is DONE * but still on the transfer log. * - * unconditionally add to the ee_hash. + * unconditionally add to the epoch_entries tree. * * if no conflicting request is found: * submit. @@ -1776,7 +1764,6 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned spin_lock_irq(&mdev->req_lock); - hlist_add_head(&e->collision, ee_hash_slot(mdev, sector)); drbd_insert_interval(&mdev->epoch_entries, &e->i); first = 1; @@ -1827,7 +1814,6 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned } if (signal_pending(current)) { - hlist_del_init(&e->collision); drbd_remove_interval(&mdev->epoch_entries, &e->i); drbd_clear_interval(&e->i); @@ -1887,7 +1873,6 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned dev_err(DEV, "submit failed, triggering re-connect\n"); spin_lock_irq(&mdev->req_lock); list_del(&e->w.list); - hlist_del_init(&e->collision); drbd_remove_interval(&mdev->epoch_entries, &e->i); drbd_clear_interval(&e->i); spin_unlock_irq(&mdev->req_lock); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index b81ce82eb15..8541b16de08 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -148,9 +148,9 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, queue_barrier(mdev); /* we need to do the conflict detection stuff, - * if we have the ee_hash (two_primaries) and - * this has been on the network */ - if ((s & RQ_NET_DONE) && mdev->ee_hash != NULL) { + * if the epoch_entries tree is non-empty and + * this request has completed on the network */ + if ((s & RQ_NET_DONE) && !RB_EMPTY_ROOT(&mdev->epoch_entries)) { const sector_t sector = req->i.sector; const int size = req->i.size; struct drbd_interval *i; @@ -254,7 +254,6 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) if (!drbd_interval_empty(&req->i)) { struct rb_root *root; - hlist_del(&req->collision); if (rw == WRITE) root = &mdev->write_requests; else @@ -313,9 +312,7 @@ static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_e * conflicting requests with local origin, and why we have to do so regardless * of whether we allowed multiple primaries. * - * BTW, in case we only have one primary, the ee_hash is empty anyways, and the - * second hlist_for_each_entry becomes a noop. This is even simpler than to - * grab a reference on the net_conf, and check for the two_primaries flag... + * In case we only have one primary, the epoch_entries tree is empty. */ static int _req_conflicts(struct drbd_request *req) { @@ -324,17 +321,11 @@ static int _req_conflicts(struct drbd_request *req) const int size = req->i.size; struct drbd_interval *i; - D_ASSERT(hlist_unhashed(&req->collision)); D_ASSERT(drbd_interval_empty(&req->i)); if (!get_net_conf(mdev)) return 0; - /* BUG_ON */ - ERR_IF (mdev->tl_hash_s == 0) - goto out_no_conflict; - BUG_ON(mdev->tl_hash == NULL); - i = drbd_find_overlap(&mdev->write_requests, sector, size); if (i) { struct drbd_request *req2 = @@ -349,10 +340,8 @@ static int _req_conflicts(struct drbd_request *req) goto out_conflict; } - if (mdev->ee_hash_s) { - /* now, check for overlapping requests with remote origin */ - BUG_ON(mdev->ee_hash == NULL); - + if (!RB_EMPTY_ROOT(&mdev->epoch_entries)) { + /* check for overlapping requests with remote origin */ i = drbd_find_overlap(&mdev->epoch_entries, sector, size); if (i) { struct drbd_epoch_entry *e = @@ -368,7 +357,6 @@ static int _req_conflicts(struct drbd_request *req) } } -out_no_conflict: /* this is like it should be, and what we expected. * our users do behave after all... */ put_net_conf(mdev); @@ -486,7 +474,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* so we can verify the handle in the answer packet * corresponding hlist_del is in _req_may_be_done() */ - hlist_add_head(&req->collision, ar_hash_slot(mdev, req->i.sector)); drbd_insert_interval(&mdev->read_requests, &req->i); set_bit(UNPLUG_REMOTE, &mdev->flags); @@ -503,7 +490,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* assert something? */ /* from drbd_make_request_common only */ - hlist_add_head(&req->collision, tl_hash_slot(mdev, req->i.sector)); /* corresponding hlist_del is in _req_may_be_done() */ drbd_insert_interval(&mdev->write_requests, &req->i); diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 6f11624cce3..ee591749c4d 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -222,32 +222,6 @@ enum drbd_req_state_bits { #define MR_READ_SHIFT 1 #define MR_READ (1 << MR_READ_SHIFT) -/* epoch entries */ -static inline -struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector) -{ - BUG_ON(mdev->ee_hash_s == 0); - return mdev->ee_hash + - ((unsigned int)(sector>>HT_SHIFT) % mdev->ee_hash_s); -} - -/* transfer log (drbd_request objects) */ -static inline -struct hlist_head *tl_hash_slot(struct drbd_conf *mdev, sector_t sector) -{ - BUG_ON(mdev->tl_hash_s == 0); - return mdev->tl_hash + - ((unsigned int)(sector>>HT_SHIFT) % mdev->tl_hash_s); -} - -/* application reads (drbd_request objects) */ -static inline -struct hlist_head *ar_hash_slot(struct drbd_conf *mdev, sector_t sector) -{ - return mdev->app_reads_hash - + ((unsigned int)(sector) % APP_R_HSIZE); -} - static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src) { struct bio *bio; @@ -274,7 +248,6 @@ static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev, req->epoch = 0; req->i.sector = bio_src->bi_sector; req->i.size = bio_src->bi_size; - INIT_HLIST_NODE(&req->collision); drbd_clear_interval(&req->i); INIT_LIST_HEAD(&req->tl_requests); INIT_LIST_HEAD(&req->w.list); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 2b83aaf02c3..1ddf6b61b20 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -122,10 +122,13 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo list_del(&e->w.list); /* has been on active_ee or sync_ee */ list_add_tail(&e->w.list, &mdev->done_ee); - /* No hlist_del_init(&e->collision) here, we did not send the Ack yet, - * neither did we wake possibly waiting conflicting requests. - * done from "drbd_process_done_ee" within the appropriate w.cb - * (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */ + /* + * Do not remove from the epoch_entries tree here: we did not send the + * Ack yet and did not wake possibly waiting conflicting requests. + * Removed from the tree from "drbd_process_done_ee" within the + * appropriate w.cb (e_end_block/e_end_resync_block) or from + * _drbd_clear_done_ee. + */ do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee); From 8554df1c6d3bb7686b39ed775772f507fa857c19 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 25 Jan 2011 15:37:43 +0100 Subject: [PATCH 021/609] drbd: Convert all constants in enum drbd_req_event to upper case Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 10 ++-- drivers/block/drbd/drbd_main.c | 28 +++++----- drivers/block/drbd/drbd_nl.c | 2 +- drivers/block/drbd/drbd_receiver.c | 18 +++---- drivers/block/drbd/drbd_req.c | 84 +++++++++++++++--------------- drivers/block/drbd/drbd_req.h | 58 ++++++++++----------- drivers/block/drbd/drbd_worker.c | 22 ++++---- 7 files changed, 111 insertions(+), 111 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 5874357b0f9..6099c667b63 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -2031,21 +2031,21 @@ static inline void drbd_thread_restart_nowait(struct drbd_thread *thi) * or implicit barrier packets as necessary. * increased: * w_send_barrier - * _req_mod(req, queue_for_net_write or queue_for_net_read); + * _req_mod(req, QUEUE_FOR_NET_WRITE or QUEUE_FOR_NET_READ); * it is much easier and equally valid to count what we queue for the * worker, even before it actually was queued or send. * (drbd_make_request_common; recovery path on read io-error) * decreased: * got_BarrierAck (respective tl_clear, tl_clear_barrier) - * _req_mod(req, data_received) + * _req_mod(req, DATA_RECEIVED) * [from receive_DataReply] - * _req_mod(req, write_acked_by_peer or recv_acked_by_peer or neg_acked) + * _req_mod(req, WRITE_ACKED_BY_PEER or RECV_ACKED_BY_PEER or NEG_ACKED) * [from got_BlockAck (P_WRITE_ACK, P_RECV_ACK)] * for some reason it is NOT decreased in got_NegAck, * but in the resulting cleanup code from report_params. * we should try to remember the reason for that... - * _req_mod(req, send_failed or send_canceled) - * _req_mod(req, connection_lost_while_pending) + * _req_mod(req, SEND_FAILED or SEND_CANCELED) + * _req_mod(req, CONNECTION_LOST_WHILE_PENDING) * [from tl_clear_barrier] */ static inline void inc_ap_pending(struct drbd_conf *mdev) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 878f7d4fc88..c5bb8714334 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -290,7 +290,7 @@ void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, /* Clean up list of requests processed during current epoch */ list_for_each_safe(le, tle, &b->requests) { r = list_entry(le, struct drbd_request, tl_requests); - _req_mod(r, barrier_acked); + _req_mod(r, BARRIER_ACKED); } /* There could be requests on the list waiting for completion of the write to the local disk. To avoid corruptions of @@ -300,10 +300,10 @@ void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, the write acks - which would be a bug and violating write ordering. To not deadlock in case we lose connection while such requests are still pending, we need some way to find them for the - _req_mode(connection_lost_while_pending). + _req_mode(CONNECTION_LOST_WHILE_PENDING). These have been list_move'd to the out_of_sequence_requests list in - _req_mod(, barrier_acked) above. + _req_mod(, BARRIER_ACKED) above. */ list_del_init(&b->requests); @@ -336,8 +336,8 @@ bail: * @mdev: DRBD device. * @what: The action/event to perform with all request objects * - * @what might be one of connection_lost_while_pending, resend, fail_frozen_disk_io, - * restart_frozen_disk_io. + * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO, + * RESTART_FROZEN_DISK_IO. */ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) { @@ -362,7 +362,7 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) tmp = b->next; if (n_writes) { - if (what == resend) { + if (what == RESEND) { b->n_writes = n_writes; if (b->w.cb == NULL) { b->w.cb = w_send_barrier; @@ -423,7 +423,7 @@ void tl_clear(struct drbd_conf *mdev) spin_lock_irq(&mdev->req_lock); - _tl_restart(mdev, connection_lost_while_pending); + _tl_restart(mdev, CONNECTION_LOST_WHILE_PENDING); /* we expect this list to be empty. */ D_ASSERT(list_empty(&mdev->out_of_sequence_requests)); @@ -433,7 +433,7 @@ void tl_clear(struct drbd_conf *mdev) r = list_entry(le, struct drbd_request, tl_requests); /* It would be nice to complete outside of spinlock. * But this is easier for now. */ - _req_mod(r, connection_lost_while_pending); + _req_mod(r, CONNECTION_LOST_WHILE_PENDING); } /* ensure bit indicating barrier is required is clear */ @@ -1321,7 +1321,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags) { enum drbd_fencing_p fp; - enum drbd_req_event what = nothing; + enum drbd_req_event what = NOTHING; union drbd_state nsm = (union drbd_state){ .i = -1 }; if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) { @@ -1349,12 +1349,12 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, nsm.i = -1; if (ns.susp_nod) { if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) - what = resend; + what = RESEND; if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING) - what = restart_frozen_disk_io; + what = RESTART_FROZEN_DISK_IO; - if (what != nothing) + if (what != NOTHING) nsm.susp_nod = 0; } @@ -1373,12 +1373,12 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* case2: The connection was established again: */ if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { clear_bit(NEW_CUR_UUID, &mdev->flags); - what = resend; + what = RESEND; nsm.susp_fen = 0; } } - if (what != nothing) { + if (what != NOTHING) { spin_lock_irq(&mdev->req_lock); _tl_restart(mdev, what); nsm.i &= mdev->state.i; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 5b8ebbef95d..1840cbb8a10 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2022,7 +2022,7 @@ static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp if (mdev->state.conn < C_CONNECTED) tl_clear(mdev); if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED) - tl_restart(mdev, fail_frozen_disk_io); + tl_restart(mdev, FAIL_FROZEN_DISK_IO); } drbd_resume_io(mdev); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 566317bb74e..1762ef0375e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -385,7 +385,7 @@ int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list) /* * This function is called from _asender only_ - * but see also comments in _req_mod(,barrier_acked) + * but see also comments in _req_mod(,BARRIER_ACKED) * and receive_Barrier. * * Move entries from net_ee to done_ee, if ready. @@ -1507,7 +1507,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi ok = recv_dless_read(mdev, req, sector, data_size); if (ok) - req_mod(req, data_received); + req_mod(req, DATA_RECEIVED); /* else: nothing. handled from drbd_disconnect... * I don't think we may complete this just yet * in case we are "on-disconnect: freeze" */ @@ -3279,7 +3279,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD); if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED && test_bit(NEW_CUR_UUID, &mdev->flags)) { - /* Do not allow tl_restart(resend) for a rebooted peer. We can only allow this + /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this for temporal network outages! */ spin_unlock_irq(&mdev->req_lock); dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n"); @@ -4272,19 +4272,19 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) switch (be16_to_cpu(h->command)) { case P_RS_WRITE_ACK: D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); - what = write_acked_by_peer_and_sis; + what = WRITE_ACKED_BY_PEER_AND_SIS; break; case P_WRITE_ACK: D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); - what = write_acked_by_peer; + what = WRITE_ACKED_BY_PEER; break; case P_RECV_ACK: D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_B); - what = recv_acked_by_peer; + what = RECV_ACKED_BY_PEER; break; case P_DISCARD_ACK: D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); - what = conflict_discarded_by_peer; + what = CONFLICT_DISCARDED_BY_PEER; break; default: D_ASSERT(0); @@ -4315,7 +4315,7 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) found = validate_req_change_req_state(mdev, p->block_id, sector, &mdev->write_requests, __func__, - neg_acked, missing_ok); + NEG_ACKED, missing_ok); if (!found) { /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs. The master bio might already be completed, therefore the @@ -4340,7 +4340,7 @@ static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h) return validate_req_change_req_state(mdev, p->block_id, sector, &mdev->read_requests, __func__, - neg_acked, false); + NEG_ACKED, false); } static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 8541b16de08..b3b1d4edbb0 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -225,10 +225,10 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) return; if (req->master_bio) { - /* this is data_received (remote read) + /* this is DATA_RECEIVED (remote read) * or protocol C P_WRITE_ACK * or protocol B P_RECV_ACK - * or protocol A "handed_over_to_network" (SendAck) + * or protocol A "HANDED_OVER_TO_NETWORK" (SendAck) * or canceled or failed, * or killed from the transfer log due to connection loss. */ @@ -393,11 +393,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* does not happen... * initialization done in drbd_req_new - case created: + case CREATED: break; */ - case to_be_send: /* via network */ + case TO_BE_SENT: /* via network */ /* reached via drbd_make_request_common * and from w_read_retry_remote */ D_ASSERT(!(req->rq_state & RQ_NET_MASK)); @@ -405,13 +405,13 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, inc_ap_pending(mdev); break; - case to_be_submitted: /* locally */ + case TO_BE_SUBMITTED: /* locally */ /* reached via drbd_make_request_common */ D_ASSERT(!(req->rq_state & RQ_LOCAL_MASK)); req->rq_state |= RQ_LOCAL_PENDING; break; - case completed_ok: + case COMPLETED_OK: if (bio_data_dir(req->master_bio) == WRITE) mdev->writ_cnt += req->i.size >> 9; else @@ -424,7 +424,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, put_ldev(mdev); break; - case write_completed_with_error: + case WRITE_COMPLETED_WITH_ERROR: req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; @@ -433,7 +433,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, put_ldev(mdev); break; - case read_ahead_completed_with_error: + case READ_AHEAD_COMPLETED_WITH_ERROR: /* it is legal to fail READA */ req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; @@ -441,7 +441,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, put_ldev(mdev); break; - case read_completed_with_error: + case READ_COMPLETED_WITH_ERROR: drbd_set_out_of_sync(mdev, req->i.sector, req->i.size); req->rq_state |= RQ_LOCAL_COMPLETED; @@ -459,12 +459,12 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; } - /* _req_mod(req,to_be_send); oops, recursion... */ + /* _req_mod(req,TO_BE_SENT); oops, recursion... */ req->rq_state |= RQ_NET_PENDING; inc_ap_pending(mdev); - /* fall through: _req_mod(req,queue_for_net_read); */ + /* fall through: _req_mod(req,QUEUE_FOR_NET_READ); */ - case queue_for_net_read: + case QUEUE_FOR_NET_READ: /* READ or READA, and * no local disk, * or target area marked as invalid, @@ -486,7 +486,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, drbd_queue_work(&mdev->data.work, &req->w); break; - case queue_for_net_write: + case QUEUE_FOR_NET_WRITE: /* assert something? */ /* from drbd_make_request_common only */ @@ -533,17 +533,17 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; - case queue_for_send_oos: + case QUEUE_FOR_SEND_OOS: req->rq_state |= RQ_NET_QUEUED; req->w.cb = w_send_oos; drbd_queue_work(&mdev->data.work, &req->w); break; - case oos_handed_to_network: + case OOS_HANDED_TO_NETWORK: /* actually the same */ - case send_canceled: + case SEND_CANCELED: /* treat it the same */ - case send_failed: + case SEND_FAILED: /* real cleanup will be done from tl_clear. just update flags * so it is no longer marked as on the worker queue */ req->rq_state &= ~RQ_NET_QUEUED; @@ -552,7 +552,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, _req_may_be_done_not_susp(req, m); break; - case handed_over_to_network: + case HANDED_OVER_TO_NETWORK: /* assert something? */ if (bio_data_dir(req->master_bio) == WRITE) atomic_add(req->i.size >> 9, &mdev->ap_in_flight); @@ -573,17 +573,17 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state &= ~RQ_NET_QUEUED; req->rq_state |= RQ_NET_SENT; /* because _drbd_send_zc_bio could sleep, and may want to - * dereference the bio even after the "write_acked_by_peer" and - * "completed_ok" events came in, once we return from + * dereference the bio even after the "WRITE_ACKED_BY_PEER" and + * "COMPLETED_OK" events came in, once we return from * _drbd_send_zc_bio (drbd_send_dblock), we have to check * whether it is done already, and end it. */ _req_may_be_done_not_susp(req, m); break; - case read_retry_remote_canceled: + case READ_RETRY_REMOTE_CANCELED: req->rq_state &= ~RQ_NET_QUEUED; /* fall through, in case we raced with drbd_disconnect */ - case connection_lost_while_pending: + case CONNECTION_LOST_WHILE_PENDING: /* transfer log cleanup after connection loss */ /* assert something? */ if (req->rq_state & RQ_NET_PENDING) @@ -599,19 +599,19 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, _req_may_be_done(req, m); /* Allowed while state.susp */ break; - case write_acked_by_peer_and_sis: + case WRITE_ACKED_BY_PEER_AND_SIS: req->rq_state |= RQ_NET_SIS; - case conflict_discarded_by_peer: + case CONFLICT_DISCARDED_BY_PEER: /* for discarded conflicting writes of multiple primaries, * there is no need to keep anything in the tl, potential * node crashes are covered by the activity log. */ - if (what == conflict_discarded_by_peer) + if (what == CONFLICT_DISCARDED_BY_PEER) dev_alert(DEV, "Got DiscardAck packet %llus +%u!" " DRBD is not a random data generator!\n", (unsigned long long)req->i.sector, req->i.size); req->rq_state |= RQ_NET_DONE; /* fall through */ - case write_acked_by_peer: + case WRITE_ACKED_BY_PEER: /* protocol C; successfully written on peer. * Nothing to do here. * We want to keep the tl in place for all protocols, to cater @@ -623,9 +623,9 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * P_BARRIER_ACK, but that is an unnecessary optimization. */ /* this makes it effectively the same as for: */ - case recv_acked_by_peer: + case RECV_ACKED_BY_PEER: /* protocol B; pretends to be successfully written on peer. - * see also notes above in handed_over_to_network about + * see also notes above in HANDED_OVER_TO_NETWORK about * protocol != C */ req->rq_state |= RQ_NET_OK; D_ASSERT(req->rq_state & RQ_NET_PENDING); @@ -635,7 +635,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, _req_may_be_done_not_susp(req, m); break; - case neg_acked: + case NEG_ACKED: /* assert something? */ if (req->rq_state & RQ_NET_PENDING) { dec_ap_pending(mdev); @@ -645,17 +645,17 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state |= RQ_NET_DONE; _req_may_be_done_not_susp(req, m); - /* else: done by handed_over_to_network */ + /* else: done by HANDED_OVER_TO_NETWORK */ break; - case fail_frozen_disk_io: + case FAIL_FROZEN_DISK_IO: if (!(req->rq_state & RQ_LOCAL_COMPLETED)) break; _req_may_be_done(req, m); /* Allowed while state.susp */ break; - case restart_frozen_disk_io: + case RESTART_FROZEN_DISK_IO: if (!(req->rq_state & RQ_LOCAL_COMPLETED)) break; @@ -670,7 +670,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, drbd_queue_work(&mdev->data.work, &req->w); break; - case resend: + case RESEND: /* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK before the connection loss (B&C only); only P_BARRIER_ACK was missing. Trowing them out of the TL here by pretending we got a BARRIER_ACK @@ -682,9 +682,9 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, } break; } - /* else, fall through to barrier_acked */ + /* else, fall through to BARRIER_ACKED */ - case barrier_acked: + case BARRIER_ACKED: if (!(req->rq_state & RQ_WRITE)) break; @@ -692,7 +692,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* barrier came in before all requests have been acked. * this is bad, because if the connection is lost now, * we won't be able to clean them up... */ - dev_err(DEV, "FIXME (barrier_acked but pending)\n"); + dev_err(DEV, "FIXME (BARRIER_ACKED but pending)\n"); list_move(&req->tl_requests, &mdev->out_of_sequence_requests); } if ((req->rq_state & RQ_NET_MASK) != 0) { @@ -703,7 +703,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, _req_may_be_done(req, m); /* Allowed while state.susp */ break; - case data_received: + case DATA_RECEIVED: D_ASSERT(req->rq_state & RQ_NET_PENDING); dec_ap_pending(mdev); req->rq_state &= ~RQ_NET_PENDING; @@ -924,9 +924,9 @@ allocate_barrier: /* mark them early for readability. * this just sets some state flags. */ if (remote) - _req_mod(req, to_be_send); + _req_mod(req, TO_BE_SENT); if (local) - _req_mod(req, to_be_submitted); + _req_mod(req, TO_BE_SUBMITTED); /* check this request on the collision detection hash tables. * if we have a conflict, just complete it here. @@ -944,11 +944,11 @@ allocate_barrier: * or READ, but not in sync. */ _req_mod(req, (rw == WRITE) - ? queue_for_net_write - : queue_for_net_read); + ? QUEUE_FOR_NET_WRITE + : QUEUE_FOR_NET_READ); } if (send_oos && drbd_set_out_of_sync(mdev, sector, size)) - _req_mod(req, queue_for_send_oos); + _req_mod(req, QUEUE_FOR_SEND_OOS); if (remote && mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) { diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index ee591749c4d..6dbbe8906c8 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -77,39 +77,39 @@ */ enum drbd_req_event { - created, - to_be_send, - to_be_submitted, + CREATED, + TO_BE_SENT, + TO_BE_SUBMITTED, /* XXX yes, now I am inconsistent... * these are not "events" but "actions" * oh, well... */ - queue_for_net_write, - queue_for_net_read, - queue_for_send_oos, + QUEUE_FOR_NET_WRITE, + QUEUE_FOR_NET_READ, + QUEUE_FOR_SEND_OOS, - send_canceled, - send_failed, - handed_over_to_network, - oos_handed_to_network, - connection_lost_while_pending, - read_retry_remote_canceled, - recv_acked_by_peer, - write_acked_by_peer, - write_acked_by_peer_and_sis, /* and set_in_sync */ - conflict_discarded_by_peer, - neg_acked, - barrier_acked, /* in protocol A and B */ - data_received, /* (remote read) */ + SEND_CANCELED, + SEND_FAILED, + HANDED_OVER_TO_NETWORK, + OOS_HANDED_TO_NETWORK, + CONNECTION_LOST_WHILE_PENDING, + READ_RETRY_REMOTE_CANCELED, + RECV_ACKED_BY_PEER, + WRITE_ACKED_BY_PEER, + WRITE_ACKED_BY_PEER_AND_SIS, /* and set_in_sync */ + CONFLICT_DISCARDED_BY_PEER, + NEG_ACKED, + BARRIER_ACKED, /* in protocol A and B */ + DATA_RECEIVED, /* (remote read) */ - read_completed_with_error, - read_ahead_completed_with_error, - write_completed_with_error, - completed_ok, - resend, - fail_frozen_disk_io, - restart_frozen_disk_io, - nothing, /* for tracing only */ + READ_COMPLETED_WITH_ERROR, + READ_AHEAD_COMPLETED_WITH_ERROR, + WRITE_COMPLETED_WITH_ERROR, + COMPLETED_OK, + RESEND, + FAIL_FROZEN_DISK_IO, + RESTART_FROZEN_DISK_IO, + NOTHING, }; /* encoding of request states for now. we don't actually need that many bits. @@ -138,8 +138,8 @@ enum drbd_req_state_bits { * recv_ack (B) or implicit "ack" (A), * still waiting for the barrier ack. * master_bio may already be completed and invalidated. - * 11100: write_acked (C), - * data_received (for remote read, any protocol) + * 11100: write acked (C), + * data received (for remote read, any protocol) * or finally the barrier ack has arrived (B,A)... * request can be freed * 01100: neg-acked (write, protocol C) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 1ddf6b61b20..550617b1a39 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -209,12 +209,12 @@ void drbd_endio_pri(struct bio *bio, int error) /* to avoid recursion in __req_mod */ if (unlikely(error)) { what = (bio_data_dir(bio) == WRITE) - ? write_completed_with_error + ? WRITE_COMPLETED_WITH_ERROR : (bio_rw(bio) == READ) - ? read_completed_with_error - : read_ahead_completed_with_error; + ? READ_COMPLETED_WITH_ERROR + : READ_AHEAD_COMPLETED_WITH_ERROR; } else - what = completed_ok; + what = COMPLETED_OK; bio_put(req->private_bio); req->private_bio = ERR_PTR(error); @@ -238,7 +238,7 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) spin_lock_irq(&mdev->req_lock); if (cancel || mdev->state.pdsk != D_UP_TO_DATE) { - _req_mod(req, read_retry_remote_canceled); + _req_mod(req, READ_RETRY_REMOTE_CANCELED); spin_unlock_irq(&mdev->req_lock); return 1; } @@ -1243,12 +1243,12 @@ int w_send_oos(struct drbd_conf *mdev, struct drbd_work *w, int cancel) int ok; if (unlikely(cancel)) { - req_mod(req, send_canceled); + req_mod(req, SEND_CANCELED); return 1; } ok = drbd_send_oos(mdev, req); - req_mod(req, oos_handed_to_network); + req_mod(req, OOS_HANDED_TO_NETWORK); return ok; } @@ -1265,12 +1265,12 @@ int w_send_dblock(struct drbd_conf *mdev, struct drbd_work *w, int cancel) int ok; if (unlikely(cancel)) { - req_mod(req, send_canceled); + req_mod(req, SEND_CANCELED); return 1; } ok = drbd_send_dblock(mdev, req); - req_mod(req, ok ? handed_over_to_network : send_failed); + req_mod(req, ok ? HANDED_OVER_TO_NETWORK : SEND_FAILED); return ok; } @@ -1287,7 +1287,7 @@ int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) int ok; if (unlikely(cancel)) { - req_mod(req, send_canceled); + req_mod(req, SEND_CANCELED); return 1; } @@ -1300,7 +1300,7 @@ int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) if (mdev->state.conn >= C_CONNECTED) drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE)); } - req_mod(req, ok ? handed_over_to_network : send_failed); + req_mod(req, ok ? HANDED_OVER_TO_NETWORK : SEND_FAILED); return ok; } From e77a0a5cc1e6961f485b5623ef42f3b910969675 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 25 Jan 2011 15:43:39 +0100 Subject: [PATCH 022/609] drbd: Convert all constants in enum drbd_thread_state to upper case Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 8 +++---- drivers/block/drbd/drbd_main.c | 38 +++++++++++++++--------------- drivers/block/drbd/drbd_receiver.c | 6 ++--- drivers/block/drbd/drbd_worker.c | 8 +++---- 4 files changed, 30 insertions(+), 30 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 6099c667b63..1cf9c095490 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -649,10 +649,10 @@ union p_polymorph { /**********************************************************************/ enum drbd_thread_state { - None, - Running, - Exiting, - Restarting + NONE, + RUNNING, + EXITING, + RESTARTING }; struct drbd_thread { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index c5bb8714334..19176a149ac 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1627,25 +1627,25 @@ restart: spin_lock_irqsave(&thi->t_lock, flags); - /* if the receiver has been "Exiting", the last thing it did + /* if the receiver has been "EXITING", the last thing it did * was set the conn state to "StandAlone", * if now a re-connect request comes in, conn state goes C_UNCONNECTED, * and receiver thread will be "started". - * drbd_thread_start needs to set "Restarting" in that case. + * drbd_thread_start needs to set "RESTARTING" in that case. * t_state check and assignment needs to be within the same spinlock, - * so either thread_start sees Exiting, and can remap to Restarting, - * or thread_start see None, and can proceed as normal. + * so either thread_start sees EXITING, and can remap to RESTARTING, + * or thread_start see NONE, and can proceed as normal. */ - if (thi->t_state == Restarting) { + if (thi->t_state == RESTARTING) { dev_info(DEV, "Restarting %s\n", current->comm); - thi->t_state = Running; + thi->t_state = RUNNING; spin_unlock_irqrestore(&thi->t_lock, flags); goto restart; } thi->task = NULL; - thi->t_state = None; + thi->t_state = NONE; smp_mb(); complete(&thi->stop); spin_unlock_irqrestore(&thi->t_lock, flags); @@ -1662,7 +1662,7 @@ static void drbd_thread_init(struct drbd_conf *mdev, struct drbd_thread *thi, { spin_lock_init(&thi->t_lock); thi->task = NULL; - thi->t_state = None; + thi->t_state = NONE; thi->function = func; thi->mdev = mdev; } @@ -1683,7 +1683,7 @@ int drbd_thread_start(struct drbd_thread *thi) spin_lock_irqsave(&thi->t_lock, flags); switch (thi->t_state) { - case None: + case NONE: dev_info(DEV, "Starting %s thread (from %s [%d])\n", me, current->comm, current->pid); @@ -1697,7 +1697,7 @@ int drbd_thread_start(struct drbd_thread *thi) init_completion(&thi->stop); D_ASSERT(thi->task == NULL); thi->reset_cpu_mask = 1; - thi->t_state = Running; + thi->t_state = RUNNING; spin_unlock_irqrestore(&thi->t_lock, flags); flush_signals(current); /* otherw. may get -ERESTARTNOINTR */ @@ -1712,17 +1712,17 @@ int drbd_thread_start(struct drbd_thread *thi) } spin_lock_irqsave(&thi->t_lock, flags); thi->task = nt; - thi->t_state = Running; + thi->t_state = RUNNING; spin_unlock_irqrestore(&thi->t_lock, flags); wake_up_process(nt); break; - case Exiting: - thi->t_state = Restarting; + case EXITING: + thi->t_state = RESTARTING; dev_info(DEV, "Restarting %s thread (from %s [%d])\n", me, current->comm, current->pid); /* fall through */ - case Running: - case Restarting: + case RUNNING: + case RESTARTING: default: spin_unlock_irqrestore(&thi->t_lock, flags); break; @@ -1736,12 +1736,12 @@ void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait) { unsigned long flags; - enum drbd_thread_state ns = restart ? Restarting : Exiting; + enum drbd_thread_state ns = restart ? RESTARTING : EXITING; /* may be called from state engine, holding the req lock irqsave */ spin_lock_irqsave(&thi->t_lock, flags); - if (thi->t_state == None) { + if (thi->t_state == NONE) { spin_unlock_irqrestore(&thi->t_lock, flags); if (restart) drbd_thread_start(thi); @@ -2504,7 +2504,7 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * drop_it = mdev->meta.socket == sock || !mdev->asender.task - || get_t_state(&mdev->asender) != Running + || get_t_state(&mdev->asender) != RUNNING || mdev->state.conn < C_CONNECTED; if (drop_it) @@ -3046,7 +3046,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) void drbd_mdev_cleanup(struct drbd_conf *mdev) { int i; - if (mdev->receiver.t_state != None) + if (mdev->receiver.t_state != NONE) dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n", mdev->receiver.t_state); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1762ef0375e..1cfcc44fd48 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -833,7 +833,7 @@ retry: if (signal_pending(current)) { flush_signals(current); smp_rmb(); - if (get_t_state(&mdev->receiver) == Exiting) + if (get_t_state(&mdev->receiver) == EXITING) goto out_release_sockets; } @@ -3700,7 +3700,7 @@ static void drbdd(struct drbd_conf *mdev) size_t shs; /* sub header size */ int rv; - while (get_t_state(&mdev->receiver) == Running) { + while (get_t_state(&mdev->receiver) == RUNNING) { drbd_thread_current_set_cpu(mdev); if (!drbd_recv_header(mdev, &cmd, &packet_size)) goto err_out; @@ -4490,7 +4490,7 @@ int drbd_asender(struct drbd_thread *thi) current->policy = SCHED_RR; /* Make this a realtime task! */ current->rt_priority = 2; /* more important than all other tasks */ - while (get_t_state(thi) == Running) { + while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(mdev); if (test_and_clear_bit(SEND_PING, &mdev->flags)) { ERR_IF(!drbd_send_ping(mdev)) goto reconnect; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 550617b1a39..c2a9285afad 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1617,7 +1617,7 @@ int drbd_worker(struct drbd_thread *thi) sprintf(current->comm, "drbd%d_worker", mdev_to_minor(mdev)); - while (get_t_state(thi) == Running) { + while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(mdev); if (down_trylock(&mdev->data.work.s)) { @@ -1637,12 +1637,12 @@ int drbd_worker(struct drbd_thread *thi) if (intr) { D_ASSERT(intr == -EINTR); flush_signals(current); - ERR_IF (get_t_state(thi) == Running) + ERR_IF (get_t_state(thi) == RUNNING) continue; break; } - if (get_t_state(thi) != Running) + if (get_t_state(thi) != RUNNING) break; /* With this break, we have done a down() but not consumed the entry from the list. The cleanup code takes care of @@ -1704,7 +1704,7 @@ int drbd_worker(struct drbd_thread *thi) D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); /* _drbd_set_state only uses stop_nowait. - * wait here for the Exiting receiver. */ + * wait here for the EXITING receiver. */ drbd_thread_stop(&mdev->receiver); drbd_mdev_cleanup(mdev); From 841ce241fa355048f66172a47e356bb6e9159c9d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 15 Dec 2010 19:31:20 +0100 Subject: [PATCH 023/609] drbd: Replace the ERR_IF macro with an assert-like macro Remove the file name and line number from the syslog messages generated: we have no duplicate function names, and no function contains the same assertion more than once. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 21 ++++--- drivers/block/drbd/drbd_bitmap.c | 91 +++++++++++++++++++----------- drivers/block/drbd/drbd_int.h | 18 ++++-- drivers/block/drbd/drbd_main.c | 13 +++-- drivers/block/drbd/drbd_nl.c | 8 ++- drivers/block/drbd/drbd_receiver.c | 19 +++++-- drivers/block/drbd/drbd_worker.c | 7 ++- 7 files changed, 114 insertions(+), 63 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 0eb17d3adf2..9284b10e42b 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -491,7 +491,8 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) unsigned int trn; rv = drbd_al_read_tr(mdev, bdev, buffer, i); - ERR_IF(rv == 0) goto cancel; + if (!expect(rv != 0)) + goto cancel; if (rv == -1) { mutex_unlock(&mdev->md_io_mutex); return 0; @@ -770,8 +771,10 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, nr_sectors = drbd_get_capacity(mdev->this_bdev); esector = sector + (size >> 9) - 1; - ERR_IF(sector >= nr_sectors) return; - ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1); + if (!expect(sector < nr_sectors)) + return; + if (!expect(esector < nr_sectors)) + esector = nr_sectors - 1; lbnr = BM_SECT_TO_BIT(nr_sectors-1); @@ -837,10 +840,10 @@ int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, nr_sectors = drbd_get_capacity(mdev->this_bdev); esector = sector + (size >> 9) - 1; - ERR_IF(sector >= nr_sectors) + if (!expect(sector < nr_sectors)) goto out; - ERR_IF(esector >= nr_sectors) - esector = (nr_sectors-1); + if (!expect(esector < nr_sectors)) + esector = nr_sectors - 1; lbnr = BM_SECT_TO_BIT(nr_sectors-1); @@ -1218,8 +1221,10 @@ void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) nr_sectors = drbd_get_capacity(mdev->this_bdev); esector = sector + (size >> 9) - 1; - ERR_IF(sector >= nr_sectors) return; - ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1); + if (!expect(sector < nr_sectors)) + return; + if (!expect(esector < nr_sectors)) + esector = nr_sectors - 1; lbnr = BM_SECT_TO_BIT(nr_sectors-1); diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 7b976296b56..c756b4dbd13 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -440,7 +440,8 @@ int drbd_bm_init(struct drbd_conf *mdev) sector_t drbd_bm_capacity(struct drbd_conf *mdev) { - ERR_IF(!mdev->bitmap) return 0; + if (!expect(mdev->bitmap)) + return 0; return mdev->bitmap->bm_dev_capacity; } @@ -448,7 +449,8 @@ sector_t drbd_bm_capacity(struct drbd_conf *mdev) */ void drbd_bm_cleanup(struct drbd_conf *mdev) { - ERR_IF (!mdev->bitmap) return; + if (!expect(mdev->bitmap)) + return; bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages); bm_vk_free(mdev->bitmap->bm_pages, (BM_P_VMALLOCED & mdev->bitmap->bm_flags)); kfree(mdev->bitmap); @@ -611,7 +613,8 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) int err = 0, growing; int opages_vmalloced; - ERR_IF(!b) return -ENOMEM; + if (!expect(b)) + return -ENOMEM; drbd_bm_lock(mdev, "resize", BM_LOCKED_MASK); @@ -733,8 +736,10 @@ unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev) unsigned long s; unsigned long flags; - ERR_IF(!b) return 0; - ERR_IF(!b->bm_pages) return 0; + if (!expect(b)) + return 0; + if (!expect(b->bm_pages)) + return 0; spin_lock_irqsave(&b->bm_lock, flags); s = b->bm_set; @@ -757,8 +762,10 @@ unsigned long drbd_bm_total_weight(struct drbd_conf *mdev) size_t drbd_bm_words(struct drbd_conf *mdev) { struct drbd_bitmap *b = mdev->bitmap; - ERR_IF(!b) return 0; - ERR_IF(!b->bm_pages) return 0; + if (!expect(b)) + return 0; + if (!expect(b->bm_pages)) + return 0; return b->bm_words; } @@ -766,7 +773,8 @@ size_t drbd_bm_words(struct drbd_conf *mdev) unsigned long drbd_bm_bits(struct drbd_conf *mdev) { struct drbd_bitmap *b = mdev->bitmap; - ERR_IF(!b) return 0; + if (!expect(b)) + return 0; return b->bm_bits; } @@ -787,8 +795,10 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number, end = offset + number; - ERR_IF(!b) return; - ERR_IF(!b->bm_pages) return; + if (!expect(b)) + return; + if (!expect(b->bm_pages)) + return; if (number == 0) return; WARN_ON(offset >= b->bm_words); @@ -832,8 +842,10 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, end = offset + number; - ERR_IF(!b) return; - ERR_IF(!b->bm_pages) return; + if (!expect(b)) + return; + if (!expect(b->bm_pages)) + return; spin_lock_irq(&b->bm_lock); if ((offset >= b->bm_words) || @@ -861,8 +873,10 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, void drbd_bm_set_all(struct drbd_conf *mdev) { struct drbd_bitmap *b = mdev->bitmap; - ERR_IF(!b) return; - ERR_IF(!b->bm_pages) return; + if (!expect(b)) + return; + if (!expect(b->bm_pages)) + return; spin_lock_irq(&b->bm_lock); bm_memset(b, 0, 0xff, b->bm_words); @@ -875,8 +889,10 @@ void drbd_bm_set_all(struct drbd_conf *mdev) void drbd_bm_clear_all(struct drbd_conf *mdev) { struct drbd_bitmap *b = mdev->bitmap; - ERR_IF(!b) return; - ERR_IF(!b->bm_pages) return; + if (!expect(b)) + return; + if (!expect(b->bm_pages)) + return; spin_lock_irq(&b->bm_lock); bm_memset(b, 0, 0, b->bm_words); @@ -1209,8 +1225,10 @@ static unsigned long bm_find_next(struct drbd_conf *mdev, struct drbd_bitmap *b = mdev->bitmap; unsigned long i = DRBD_END_OF_BITMAP; - ERR_IF(!b) return i; - ERR_IF(!b->bm_pages) return i; + if (!expect(b)) + return i; + if (!expect(b->bm_pages)) + return i; spin_lock_irq(&b->bm_lock); if (BM_DONT_TEST & b->bm_flags) @@ -1311,8 +1329,10 @@ static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, struct drbd_bitmap *b = mdev->bitmap; int c = 0; - ERR_IF(!b) return 1; - ERR_IF(!b->bm_pages) return 0; + if (!expect(b)) + return 1; + if (!expect(b->bm_pages)) + return 0; spin_lock_irqsave(&b->bm_lock, flags); if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags) @@ -1437,8 +1457,10 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr) unsigned long *p_addr; int i; - ERR_IF(!b) return 0; - ERR_IF(!b->bm_pages) return 0; + if (!expect(b)) + return 0; + if (!expect(b->bm_pages)) + return 0; spin_lock_irqsave(&b->bm_lock, flags); if (BM_DONT_TEST & b->bm_flags) @@ -1472,8 +1494,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi * robust in case we screwed up elsewhere, in that case pretend there * was one dirty bit in the requested area, so we won't try to do a * local read there (no bitmap probably implies no disk) */ - ERR_IF(!b) return 1; - ERR_IF(!b->bm_pages) return 1; + if (!expect(b)) + return 1; + if (!expect(b->bm_pages)) + return 1; spin_lock_irqsave(&b->bm_lock, flags); if (BM_DONT_TEST & b->bm_flags) @@ -1486,11 +1510,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi bm_unmap(p_addr); p_addr = bm_map_pidx(b, idx); } - ERR_IF (bitnr >= b->bm_bits) { - dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); - } else { + if (expect(bitnr < b->bm_bits)) c += (0 != test_bit_le(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr)); - } + else + dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); } if (p_addr) bm_unmap(p_addr); @@ -1520,8 +1543,10 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) unsigned long flags; unsigned long *p_addr, *bm; - ERR_IF(!b) return 0; - ERR_IF(!b->bm_pages) return 0; + if (!expect(b)) + return 0; + if (!expect(b->bm_pages)) + return 0; spin_lock_irqsave(&b->bm_lock, flags); if (BM_DONT_TEST & b->bm_flags) @@ -1553,8 +1578,10 @@ unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) unsigned long weight; unsigned long s, e; int count, i, do_now; - ERR_IF(!b) return 0; - ERR_IF(!b->bm_pages) return 0; + if (!expect(b)) + return 0; + if (!expect(b->bm_pages)) + return 0; spin_lock_irq(&b->bm_lock); if (BM_DONT_SET & b->bm_flags) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 1cf9c095490..03dd7a0b1bc 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -102,12 +102,18 @@ struct drbd_conf; #define D_ASSERT(exp) if (!(exp)) \ dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__) -#define ERR_IF(exp) if (({ \ - int _b = (exp) != 0; \ - if (_b) dev_err(DEV, "ASSERT FAILED: %s: (%s) in %s:%d\n", \ - __func__, #exp, __FILE__, __LINE__); \ - _b; \ - })) +/** + * expect - Make an assertion + * + * Unlike the assert macro, this macro returns a boolean result. + */ +#define expect(exp) ({ \ + bool _bool = (exp); \ + if (!_bool) \ + dev_err(DEV, "ASSERTION %s FAILED in %s\n", \ + #exp, __func__); \ + _bool; \ + }) /* Defines to control fault insertion */ enum { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 19176a149ac..46ba4aa03f3 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1810,7 +1810,7 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev) p == mdev->receiver.task ? &mdev->receiver : p == mdev->worker.task ? &mdev->worker : NULL; - ERR_IF(thi == NULL) + if (!expect(thi != NULL)) return; if (!thi->reset_cpu_mask) return; @@ -1826,8 +1826,10 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, { int sent, ok; - ERR_IF(!h) return false; - ERR_IF(!size) return false; + if (!expect(h)) + return false; + if (!expect(size)) + return false; h->magic = cpu_to_be32(DRBD_MAGIC); h->command = cpu_to_be16(cmd); @@ -2300,7 +2302,8 @@ int _drbd_send_bitmap(struct drbd_conf *mdev) struct p_header80 *p; int err; - ERR_IF(!mdev->bitmap) return false; + if (!expect(mdev->bitmap)) + return false; /* maybe we should use some per thread scratch page, * and allocate that during initial device creation? */ @@ -3255,7 +3258,7 @@ static void drbd_delete_device(unsigned int minor) dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt, __FILE__ , __LINE__); - ERR_IF (!list_empty(&mdev->data.work.q)) { + if (!expect(list_empty(&mdev->data.work.q))) { struct list_head *lp; list_for_each(lp, &mdev->data.work.q) { dev_err(DEV, "lp = %p\n", lp); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 1840cbb8a10..51da84940a3 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -751,7 +751,7 @@ static int drbd_check_al_size(struct drbd_conf *mdev) unsigned int in_use; int i; - ERR_IF(mdev->sync_conf.al_extents < 7) + if (!expect(mdev->sync_conf.al_extents >= 7)) mdev->sync_conf.al_extents = 127; if (mdev->act_log && @@ -1804,8 +1804,10 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n } } - ERR_IF (sc.rate < 1) sc.rate = 1; - ERR_IF (sc.al_extents < 7) sc.al_extents = 127; /* arbitrary minimum */ + if (!expect(sc.rate >= 1)) + sc.rate = 1; + if (!expect(sc.al_extents >= 7)) + sc.al_extents = 127; /* arbitrary minimum */ #define AL_MAX ((MD_AL_MAX_SIZE-1) * AL_EXTENTS_PT) if (sc.al_extents > AL_MAX) { dev_err(DEV, "sc.al_extents > %d\n", AL_MAX); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1cfcc44fd48..a41b07820dd 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1260,9 +1260,12 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __ data_size -= dgs; - ERR_IF(data_size == 0) return NULL; - ERR_IF(data_size & 0x1ff) return NULL; - ERR_IF(data_size > DRBD_MAX_BIO_SIZE) return NULL; + if (!expect(data_size != 0)) + return NULL; + if (!expect(IS_ALIGNED(data_size, 512))) + return NULL; + if (!expect(data_size <= DRBD_MAX_BIO_SIZE)) + return NULL; /* even though we trust out peer, * we sometimes have to double check. */ @@ -3615,7 +3618,8 @@ static int receive_skip(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned while (size > 0) { want = min_t(int, size, sizeof(sink)); r = drbd_recv(mdev, sink, want); - ERR_IF(r <= 0) break; + if (!expect(r > 0)) + break; size -= r; } return size == 0; @@ -4493,7 +4497,10 @@ int drbd_asender(struct drbd_thread *thi) while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(mdev); if (test_and_clear_bit(SEND_PING, &mdev->flags)) { - ERR_IF(!drbd_send_ping(mdev)) goto reconnect; + if (!drbd_send_ping(mdev)) { + dev_err(DEV, "drbd_send_ping has failed\n"); + goto reconnect; + } mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_timeo*HZ/10; ping_timeout_active = 1; @@ -4587,7 +4594,7 @@ int drbd_asender(struct drbd_thread *thi) goto disconnect; } expect = cmd->pkt_size; - ERR_IF(len != expect-sizeof(struct p_header80)) + if (!expect(len == expect - sizeof(struct p_header80))) goto reconnect; } if (received == expect) { diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index c2a9285afad..2e2c0659a3e 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1331,7 +1331,8 @@ static int _drbd_may_sync_now(struct drbd_conf *mdev) if (odev->sync_conf.after == -1) return 1; odev = minor_to_mdev(odev->sync_conf.after); - ERR_IF(!odev) return 1; + if (!expect(odev)) + return 1; if ((odev->state.conn >= C_SYNC_SOURCE && odev->state.conn <= C_PAUSED_SYNC_T) || odev->state.aftr_isp || odev->state.peer_isp || @@ -1637,7 +1638,7 @@ int drbd_worker(struct drbd_thread *thi) if (intr) { D_ASSERT(intr == -EINTR); flush_signals(current); - ERR_IF (get_t_state(thi) == RUNNING) + if (!expect(get_t_state(thi) != RUNNING)) continue; break; } @@ -1650,7 +1651,7 @@ int drbd_worker(struct drbd_thread *thi) w = NULL; spin_lock_irq(&mdev->data.work.q_lock); - ERR_IF(list_empty(&mdev->data.work.q)) { + if (!expect(!list_empty(&mdev->data.work.q))) { /* something terribly wrong in our logic. * we were able to down() the semaphore, * but the list is empty... doh. From 70dc65e1b3453c5b78ab8ec6bfb604aee7038ae3 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 21 Dec 2010 14:46:57 +0100 Subject: [PATCH 024/609] drbd: Remove some useless paranoia code The open_cnt check is an open-coded D_ASSERT() check. In case the data.work queue is not empty, it does not really help to know which drbd_work elements remained on that list: they will be freed immediately afterwards, anyway. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 46ba4aa03f3..2902f6dd7bf 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3254,16 +3254,8 @@ static void drbd_delete_device(unsigned int minor) return; /* paranoia asserts */ - if (mdev->open_cnt != 0) - dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt, - __FILE__ , __LINE__); - - if (!expect(list_empty(&mdev->data.work.q))) { - struct list_head *lp; - list_for_each(lp, &mdev->data.work.q) { - dev_err(DEV, "lp = %p\n", lp); - } - }; + D_ASSERT(mdev->open_cnt == 0); + D_ASSERT(list_empty(&mdev->data.work.q)); /* end paranoia asserts */ del_gendisk(mdev->vdisk); From e3cfa7b26a56cb9a3361034cc74f2aaad45d0987 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 25 Jan 2011 16:36:10 +0100 Subject: [PATCH 025/609] drbd: Inline function overlaps() is now unused Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 6dbbe8906c8..9d75647cae8 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -260,11 +260,6 @@ static inline void drbd_req_free(struct drbd_request *req) mempool_free(req, drbd_request_mempool); } -static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2) -{ - return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9))); -} - /* Short lived temporary struct on the stack. * We could squirrel the error to be returned into * bio->bi_size, or similar. But that would be too ugly. */ From 6618bf16384463c0b97a5f5f1f0ce5276f5865fd Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 26 Jan 2011 13:06:08 +0100 Subject: [PATCH 026/609] drbd: Interval tree bugfix Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_interval.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_interval.c b/drivers/block/drbd/drbd_interval.c index 2511dd9993f..b77a9bda03d 100644 --- a/drivers/block/drbd/drbd_interval.c +++ b/drivers/block/drbd/drbd_interval.c @@ -58,8 +58,9 @@ drbd_insert_interval(struct rb_root *root, struct drbd_interval *this) new = &(*new)->rb_right; else if (this < here) new = &(*new)->rb_left; - else if (this->sector > here->sector) + else if (this > here) new = &(*new)->rb_right; + else return false; } From 9749f30f1a387070e6e8351f35aeb829eacc3ab6 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 20 Jul 2011 14:59:37 +0200 Subject: [PATCH 027/609] idr: idr_for_each_entry() macro Inspired by the list_for_each_entry() macro --- include/linux/idr.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/linux/idr.h b/include/linux/idr.h index 255491cf522..52a9da29529 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -152,4 +152,15 @@ void ida_simple_remove(struct ida *ida, unsigned int id); void __init idr_init_cache(void); +/** + * idr_for_each_entry - iterate over an idr's elements of a given type + * @idp: idr handle + * @entry: the type * to use as cursor + * @id: id entry's key + */ +#define idr_for_each_entry(idp, entry, id) \ + for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \ + entry != NULL; \ + ++id, entry = (typeof(entry))idr_get_next((idp), &(id))) + #endif /* __IDR_H__ */ From 2111438b30a509cfe8a1595d7fad304308ff2466 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 12:26:59 +0100 Subject: [PATCH 028/609] drbd: Minimal struct drbd_tconn Starting to dissolve the network connection from the actual block devices. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 15 ++++++++++++ drivers/block/drbd/drbd_main.c | 45 ++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 03dd7a0b1bc..1f486f001df 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -94,6 +94,7 @@ extern char usermode_helper[]; #define UUID_NEW_BM_OFFSET ((u64)0x0001000000000000ULL) struct drbd_conf; +struct drbd_tconn; /* to shorten dev_warn(DEV, "msg"); and relatives statements */ @@ -960,7 +961,18 @@ struct fifo_buffer { unsigned int size; }; +struct drbd_tconn { /* is a resource from the config file */ + char *name; /* Resource name */ + struct list_head all_tconn; /* List of all drbd_tconn, prot by global_state_lock */ + struct drbd_conf *volume0; /* TODO: Remove me again */ + + struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ +}; + struct drbd_conf { + struct drbd_tconn *tconn; + int vnr; /* volume number within the connection */ + /* things that are stored as / read from meta data on disk */ unsigned long flags; @@ -1496,6 +1508,9 @@ extern rwlock_t global_state_lock; extern struct drbd_conf *drbd_new_device(unsigned int minor); extern void drbd_free_mdev(struct drbd_conf *mdev); +struct drbd_tconn *drbd_new_tconn(char *name); +extern void drbd_free_tconn(struct drbd_tconn *tconn); + extern int proc_details; /* drbd_req */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2902f6dd7bf..a6ac0c81406 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -132,6 +132,7 @@ module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0 * as member "struct gendisk *vdisk;" */ struct drbd_conf **minor_table; +struct list_head drbd_tconns; /* list of struct drbd_tconn */ struct kmem_cache *drbd_request_cache; struct kmem_cache *drbd_ee_cache; /* epoch entries */ @@ -3267,6 +3268,7 @@ static void drbd_delete_device(unsigned int minor) bdput(mdev->this_bdev); drbd_free_resources(mdev); + drbd_free_tconn(mdev->tconn); drbd_release_ee_lists(mdev); @@ -3358,6 +3360,41 @@ out: return r; } +struct drbd_tconn *drbd_new_tconn(char *name) +{ + struct drbd_tconn *tconn; + + tconn = kzalloc(sizeof(struct drbd_tconn), GFP_KERNEL); + if (!tconn) + return NULL; + + tconn->name = kstrdup(name, GFP_KERNEL); + if (!tconn->name) + goto fail; + + write_lock_irq(&global_state_lock); + list_add(&tconn->all_tconn, &drbd_tconns); + write_unlock_irq(&global_state_lock); + + return tconn; + +fail: + kfree(tconn->name); + kfree(tconn); + + return NULL; +} + +void drbd_free_tconn(struct drbd_tconn *tconn) +{ + write_lock_irq(&global_state_lock); + list_del(&tconn->all_tconn); + write_unlock_irq(&global_state_lock); + + kfree(tconn->name); + kfree(tconn); +} + struct drbd_conf *drbd_new_device(unsigned int minor) { struct drbd_conf *mdev; @@ -3368,9 +3405,14 @@ struct drbd_conf *drbd_new_device(unsigned int minor) mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL); if (!mdev) return NULL; + mdev->tconn = drbd_new_tconn("dummy"); + if (!mdev->tconn) + goto out_no_tconn; + if (!zalloc_cpumask_var(&mdev->cpu_mask, GFP_KERNEL)) goto out_no_cpumask; + mdev->tconn->volume0 = mdev; mdev->minor = minor; drbd_init_set_defaults(mdev); @@ -3447,6 +3489,8 @@ out_no_disk: out_no_q: free_cpumask_var(mdev->cpu_mask); out_no_cpumask: + drbd_free_tconn(mdev->tconn); +out_no_tconn: kfree(mdev); return NULL; } @@ -3526,6 +3570,7 @@ int __init drbd_init(void) } rwlock_init(&global_state_lock); + INIT_LIST_HEAD(&drbd_tconns); printk(KERN_INFO "drbd: initialized. " "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n", From 89e58e755e37137135c28a90c93be1b28faff485 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 13:12:45 +0100 Subject: [PATCH 029/609] drbd: moved net_conf from mdev to tconn Besides moving the struct member, everything else is generated by: sed -i -e 's/mdev->net_conf/mdev->tconn->net_conf/g' \ -e 's/odev->net_conf/odev->tconn->net_conf/g' \ *.[ch] Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 7 +- drivers/block/drbd/drbd_main.c | 28 +++---- drivers/block/drbd/drbd_nl.c | 28 +++---- drivers/block/drbd/drbd_proc.c | 4 +- drivers/block/drbd/drbd_receiver.c | 124 ++++++++++++++--------------- drivers/block/drbd/drbd_req.c | 22 ++--- drivers/block/drbd/drbd_worker.c | 8 +- 7 files changed, 110 insertions(+), 111 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 1f486f001df..4c4c276e0eb 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -977,7 +977,6 @@ struct drbd_conf { unsigned long flags; /* configured by drbdsetup */ - struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ struct syncer_conf sync_conf; struct drbd_backing_dev *ldev __protected_by(local); @@ -2134,10 +2133,10 @@ static inline void put_net_conf(struct drbd_conf *mdev) } /** - * get_net_conf() - Increase ref count on mdev->net_conf; Returns 0 if nothing there + * get_net_conf() - Increase ref count on mdev->tconn->net_conf; Returns 0 if nothing there * @mdev: DRBD device. * - * You have to call put_net_conf() when finished working with mdev->net_conf. + * You have to call put_net_conf() when finished working with mdev->tconn->net_conf. */ static inline int get_net_conf(struct drbd_conf *mdev) { @@ -2253,7 +2252,7 @@ static inline int drbd_get_max_buffers(struct drbd_conf *mdev) { int mxb = 1000000; /* arbitrary limit on open requests */ if (get_net_conf(mdev)) { - mxb = mdev->net_conf->max_buffers; + mxb = mdev->tconn->net_conf->max_buffers; put_net_conf(mdev); } return mxb; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index a6ac0c81406..7e88a49d344 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -693,7 +693,7 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) } if (get_net_conf(mdev)) { - if (!mdev->net_conf->two_primaries && + if (!mdev->tconn->net_conf->two_primaries && ns.role == R_PRIMARY && ns.peer == R_PRIMARY) rv = SS_TWO_PRIMARIES; put_net_conf(mdev); @@ -1952,7 +1952,7 @@ int drbd_send_protocol(struct drbd_conf *mdev) size = sizeof(struct p_protocol); if (mdev->agreed_pro_version >= 87) - size += strlen(mdev->net_conf->integrity_alg) + 1; + size += strlen(mdev->tconn->net_conf->integrity_alg) + 1; /* we must not recurse into our own queue, * as that is blocked during handshake */ @@ -1960,16 +1960,16 @@ int drbd_send_protocol(struct drbd_conf *mdev) if (p == NULL) return 0; - p->protocol = cpu_to_be32(mdev->net_conf->wire_protocol); - p->after_sb_0p = cpu_to_be32(mdev->net_conf->after_sb_0p); - p->after_sb_1p = cpu_to_be32(mdev->net_conf->after_sb_1p); - p->after_sb_2p = cpu_to_be32(mdev->net_conf->after_sb_2p); - p->two_primaries = cpu_to_be32(mdev->net_conf->two_primaries); + p->protocol = cpu_to_be32(mdev->tconn->net_conf->wire_protocol); + p->after_sb_0p = cpu_to_be32(mdev->tconn->net_conf->after_sb_0p); + p->after_sb_1p = cpu_to_be32(mdev->tconn->net_conf->after_sb_1p); + p->after_sb_2p = cpu_to_be32(mdev->tconn->net_conf->after_sb_2p); + p->two_primaries = cpu_to_be32(mdev->tconn->net_conf->two_primaries); cf = 0; - if (mdev->net_conf->want_lose) + if (mdev->tconn->net_conf->want_lose) cf |= CF_WANT_LOSE; - if (mdev->net_conf->dry_run) { + if (mdev->tconn->net_conf->dry_run) { if (mdev->agreed_pro_version >= 92) cf |= CF_DRY_RUN; else { @@ -1981,7 +1981,7 @@ int drbd_send_protocol(struct drbd_conf *mdev) p->conn_flags = cpu_to_be32(cf); if (mdev->agreed_pro_version >= 87) - strcpy(p->integrity_alg, mdev->net_conf->integrity_alg); + strcpy(p->integrity_alg, mdev->tconn->net_conf->integrity_alg); rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL, (struct p_header80 *)p, size); @@ -2002,7 +2002,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) mdev->comm_bm_set = drbd_bm_total_weight(mdev); p.uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set); - uuid_flags |= mdev->net_conf->want_lose ? 1 : 0; + uuid_flags |= mdev->tconn->net_conf->want_lose ? 1 : 0; uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0; uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0; p.uuid[UI_FLAGS] = cpu_to_be64(uuid_flags); @@ -2717,7 +2717,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) * out ok after sending on this side, but does not fit on the * receiving side, we sure have detected corruption elsewhere. */ - if (mdev->net_conf->wire_protocol == DRBD_PROT_A || dgs) + if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A || dgs) ok = _drbd_send_bio(mdev, req->master_bio); else ok = _drbd_send_zc_bio(mdev, req->master_bio); @@ -2843,7 +2843,7 @@ int drbd_send(struct drbd_conf *mdev, struct socket *sock, msg.msg_flags = msg_flags | MSG_NOSIGNAL; if (sock == mdev->data.socket) { - mdev->ko_count = mdev->net_conf->ko_count; + mdev->ko_count = mdev->tconn->net_conf->ko_count; drbd_update_congested(mdev); } do { @@ -3073,7 +3073,7 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev) mdev->rs_mark_left[i] = 0; mdev->rs_mark_time[i] = 0; } - D_ASSERT(mdev->net_conf == NULL); + D_ASSERT(mdev->tconn->net_conf == NULL); drbd_set_my_capacity(mdev, 0); if (mdev->bitmap) { diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 51da84940a3..d816c61cd98 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -150,21 +150,21 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev)); if (get_net_conf(mdev)) { - switch (((struct sockaddr *)mdev->net_conf->peer_addr)->sa_family) { + switch (((struct sockaddr *)mdev->tconn->net_conf->peer_addr)->sa_family) { case AF_INET6: afs = "ipv6"; snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI6", - &((struct sockaddr_in6 *)mdev->net_conf->peer_addr)->sin6_addr); + &((struct sockaddr_in6 *)mdev->tconn->net_conf->peer_addr)->sin6_addr); break; case AF_INET: afs = "ipv4"; snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4", - &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr); + &((struct sockaddr_in *)mdev->tconn->net_conf->peer_addr)->sin_addr); break; default: afs = "ssocks"; snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4", - &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr); + &((struct sockaddr_in *)mdev->tconn->net_conf->peer_addr)->sin_addr); } snprintf(af, 20, "DRBD_PEER_AF=%s", afs); envp[3]=af; @@ -379,7 +379,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) if (rv == SS_TWO_PRIMARIES) { /* Maybe the peer is detected as dead very soon... retry at most once more in this case. */ - schedule_timeout_interruptible((mdev->net_conf->ping_timeo+1)*HZ/10); + schedule_timeout_interruptible((mdev->tconn->net_conf->ping_timeo+1)*HZ/10); if (try < max_tries) try = max_tries - 1; continue; @@ -410,7 +410,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) } } else { if (get_net_conf(mdev)) { - mdev->net_conf->want_lose = 0; + mdev->tconn->net_conf->want_lose = 0; put_net_conf(mdev); } set_disk_ro(mdev->vdisk, false); @@ -972,7 +972,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } if (get_net_conf(mdev)) { - int prot = mdev->net_conf->wire_protocol; + int prot = mdev->tconn->net_conf->wire_protocol; put_net_conf(mdev); if (nbc->dc.fencing == FP_STONITH && prot == DRBD_PROT_A) { retcode = ERR_STONITH_AND_PROT_A; @@ -1439,13 +1439,13 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, if (!odev || odev == mdev) continue; if (get_net_conf(odev)) { - taken_addr = (struct sockaddr *)&odev->net_conf->my_addr; - if (new_conf->my_addr_len == odev->net_conf->my_addr_len && + taken_addr = (struct sockaddr *)&odev->tconn->net_conf->my_addr; + if (new_conf->my_addr_len == odev->tconn->net_conf->my_addr_len && !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len)) retcode = ERR_LOCAL_ADDR; - taken_addr = (struct sockaddr *)&odev->net_conf->peer_addr; - if (new_conf->peer_addr_len == odev->net_conf->peer_addr_len && + taken_addr = (struct sockaddr *)&odev->tconn->net_conf->peer_addr; + if (new_conf->peer_addr_len == odev->tconn->net_conf->peer_addr_len && !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len)) retcode = ERR_PEER_ADDR; @@ -1522,12 +1522,12 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, drbd_flush_workqueue(mdev); spin_lock_irq(&mdev->req_lock); - if (mdev->net_conf != NULL) { + if (mdev->tconn->net_conf != NULL) { retcode = ERR_NET_CONFIGURED; spin_unlock_irq(&mdev->req_lock); goto fail; } - mdev->net_conf = new_conf; + mdev->tconn->net_conf = new_conf; mdev->send_cnt = 0; mdev->recv_cnt = 0; @@ -2051,7 +2051,7 @@ static int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl } if (get_net_conf(mdev)) { - tl = net_conf_to_tags(mdev, mdev->net_conf, tl); + tl = net_conf_to_tags(mdev, mdev->tconn->net_conf, tl); put_net_conf(mdev); } tl = syncer_conf_to_tags(mdev, &mdev->sync_conf, tl); diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 2959cdfb77f..4e53cb3d99e 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -254,8 +254,8 @@ static int drbd_seq_show(struct seq_file *seq, void *v) drbd_role_str(mdev->state.peer), drbd_disk_str(mdev->state.disk), drbd_disk_str(mdev->state.pdsk), - (mdev->net_conf == NULL ? ' ' : - (mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')), + (mdev->tconn->net_conf == NULL ? ' ' : + (mdev->tconn->net_conf->wire_protocol - DRBD_PROT_A+'A')), is_susp(mdev->state) ? 's' : 'r', mdev->state.aftr_isp ? 'a' : '-', mdev->state.peer_isp ? 'p' : '-', diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index a41b07820dd..e5e7dd1c6dd 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -237,7 +237,7 @@ static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool /* Yes, we may run up to @number over max_buffers. If we * follow it strictly, the admin will get it wrong anyways. */ - if (atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers) + if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) page = drbd_pp_first_pages_or_try_alloc(mdev, number); while (page == NULL) { @@ -245,7 +245,7 @@ static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool drbd_kick_lo_and_reclaim_net(mdev); - if (atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers) { + if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) { page = drbd_pp_first_pages_or_try_alloc(mdev, number); if (page) break; @@ -582,7 +582,7 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) return NULL; what = "sock_create_kern"; - err = sock_create_kern(((struct sockaddr *)mdev->net_conf->my_addr)->sa_family, + err = sock_create_kern(((struct sockaddr *)mdev->tconn->net_conf->my_addr)->sa_family, SOCK_STREAM, IPPROTO_TCP, &sock); if (err < 0) { sock = NULL; @@ -590,9 +590,9 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) } sock->sk->sk_rcvtimeo = - sock->sk->sk_sndtimeo = mdev->net_conf->try_connect_int*HZ; - drbd_setbufsize(sock, mdev->net_conf->sndbuf_size, - mdev->net_conf->rcvbuf_size); + sock->sk->sk_sndtimeo = mdev->tconn->net_conf->try_connect_int*HZ; + drbd_setbufsize(sock, mdev->tconn->net_conf->sndbuf_size, + mdev->tconn->net_conf->rcvbuf_size); /* explicitly bind to the configured IP as source IP * for the outgoing connections. @@ -601,9 +601,9 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) * Make sure to use 0 as port number, so linux selects * a free one dynamically. */ - memcpy(&src_in6, mdev->net_conf->my_addr, - min_t(int, mdev->net_conf->my_addr_len, sizeof(src_in6))); - if (((struct sockaddr *)mdev->net_conf->my_addr)->sa_family == AF_INET6) + memcpy(&src_in6, mdev->tconn->net_conf->my_addr, + min_t(int, mdev->tconn->net_conf->my_addr_len, sizeof(src_in6))); + if (((struct sockaddr *)mdev->tconn->net_conf->my_addr)->sa_family == AF_INET6) src_in6.sin6_port = 0; else ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */ @@ -611,7 +611,7 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) what = "bind before connect"; err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, - mdev->net_conf->my_addr_len); + mdev->tconn->net_conf->my_addr_len); if (err < 0) goto out; @@ -620,8 +620,8 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) disconnect_on_error = 0; what = "connect"; err = sock->ops->connect(sock, - (struct sockaddr *)mdev->net_conf->peer_addr, - mdev->net_conf->peer_addr_len, 0); + (struct sockaddr *)mdev->tconn->net_conf->peer_addr, + mdev->tconn->net_conf->peer_addr_len, 0); out: if (err < 0) { @@ -658,26 +658,26 @@ static struct socket *drbd_wait_for_connect(struct drbd_conf *mdev) return NULL; what = "sock_create_kern"; - err = sock_create_kern(((struct sockaddr *)mdev->net_conf->my_addr)->sa_family, + err = sock_create_kern(((struct sockaddr *)mdev->tconn->net_conf->my_addr)->sa_family, SOCK_STREAM, IPPROTO_TCP, &s_listen); if (err) { s_listen = NULL; goto out; } - timeo = mdev->net_conf->try_connect_int * HZ; + timeo = mdev->tconn->net_conf->try_connect_int * HZ; timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */ s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */ s_listen->sk->sk_rcvtimeo = timeo; s_listen->sk->sk_sndtimeo = timeo; - drbd_setbufsize(s_listen, mdev->net_conf->sndbuf_size, - mdev->net_conf->rcvbuf_size); + drbd_setbufsize(s_listen, mdev->tconn->net_conf->sndbuf_size, + mdev->tconn->net_conf->rcvbuf_size); what = "bind before listen"; err = s_listen->ops->bind(s_listen, - (struct sockaddr *) mdev->net_conf->my_addr, - mdev->net_conf->my_addr_len); + (struct sockaddr *) mdev->tconn->net_conf->my_addr, + mdev->tconn->net_conf->my_addr_len); if (err < 0) goto out; @@ -791,7 +791,7 @@ static int drbd_connect(struct drbd_conf *mdev) } if (sock && msock) { - schedule_timeout_interruptible(mdev->net_conf->ping_timeo*HZ/10); + schedule_timeout_interruptible(mdev->tconn->net_conf->ping_timeo*HZ/10); ok = drbd_socket_okay(mdev, &sock); ok = drbd_socket_okay(mdev, &msock) && ok; if (ok) @@ -855,15 +855,15 @@ retry: msock->sk->sk_priority = TC_PRIO_INTERACTIVE; /* NOT YET ... - * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; + * sock->sk->sk_sndtimeo = mdev->tconn->net_conf->timeout*HZ/10; * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; * first set it to the P_HAND_SHAKE timeout, * which we set to 4x the configured ping_timeout. */ sock->sk->sk_sndtimeo = - sock->sk->sk_rcvtimeo = mdev->net_conf->ping_timeo*4*HZ/10; + sock->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_timeo*4*HZ/10; - msock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; - msock->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ; + msock->sk->sk_sndtimeo = mdev->tconn->net_conf->timeout*HZ/10; + msock->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ; /* we don't want delays. * we use TCP_CORK where appropriate, though */ @@ -895,7 +895,7 @@ retry: if (drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS) return 0; - sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; + sock->sk->sk_sndtimeo = mdev->tconn->net_conf->timeout*HZ/10; sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; atomic_set(&mdev->packet_seq, 0); @@ -1555,7 +1555,7 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) sector_t sector = e->i.sector; int ok = 1, pcmd; - if (mdev->net_conf->wire_protocol == DRBD_PROT_C) { + if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) { if (likely((e->flags & EE_WAS_ERROR) == 0)) { pcmd = (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn <= C_PAUSED_SYNC_T && @@ -1573,7 +1573,7 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } /* we delete from the conflict detection hash _after_ we sent out the * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ - if (mdev->net_conf->two_primaries) { + if (mdev->tconn->net_conf->two_primaries) { spin_lock_irq(&mdev->req_lock); D_ASSERT(!drbd_interval_empty(&e->i)); drbd_remove_interval(&mdev->epoch_entries, &e->i); @@ -1592,7 +1592,7 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; int ok = 1; - D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); + D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); ok = drbd_send_ack(mdev, P_DISCARD_ACK, e); spin_lock_irq(&mdev->req_lock); @@ -1717,7 +1717,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned spin_unlock(&mdev->epoch_lock); /* I'm the receiver, I do hold a net_cnt reference. */ - if (!mdev->net_conf->two_primaries) { + if (!mdev->tconn->net_conf->two_primaries) { spin_lock_irq(&mdev->req_lock); } else { /* don't get the req_lock yet, @@ -1727,7 +1727,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned DEFINE_WAIT(wait); int first; - D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); + D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); /* conflict detection and handling: * 1. wait on the sequence number, @@ -1845,7 +1845,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned list_add(&e->w.list, &mdev->active_ee); spin_unlock_irq(&mdev->req_lock); - switch (mdev->net_conf->wire_protocol) { + switch (mdev->tconn->net_conf->wire_protocol) { case DRBD_PROT_C: inc_unacked(mdev); /* corresponding dec_unacked() in e_end_block() @@ -2153,7 +2153,7 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) ch_peer = mdev->p_uuid[UI_SIZE]; ch_self = mdev->comm_bm_set; - switch (mdev->net_conf->after_sb_0p) { + switch (mdev->tconn->net_conf->after_sb_0p) { case ASB_CONSENSUS: case ASB_DISCARD_SECONDARY: case ASB_CALL_HELPER: @@ -2192,7 +2192,7 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) if (ch_peer == 0) { rv = 1; break; } if (ch_self == 0) { rv = -1; break; } } - if (mdev->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG) + if (mdev->tconn->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG) break; case ASB_DISCARD_LEAST_CHG: if (ch_self < ch_peer) @@ -2218,7 +2218,7 @@ static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) { int hg, rv = -100; - switch (mdev->net_conf->after_sb_1p) { + switch (mdev->tconn->net_conf->after_sb_1p) { case ASB_DISCARD_YOUNGER_PRI: case ASB_DISCARD_OLDER_PRI: case ASB_DISCARD_LEAST_CHG: @@ -2267,7 +2267,7 @@ static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) { int hg, rv = -100; - switch (mdev->net_conf->after_sb_2p) { + switch (mdev->tconn->net_conf->after_sb_2p) { case ASB_DISCARD_YOUNGER_PRI: case ASB_DISCARD_OLDER_PRI: case ASB_DISCARD_LEAST_CHG: @@ -2558,7 +2558,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol if (abs(hg) == 100) drbd_khelper(mdev, "initial-split-brain"); - if (hg == 100 || (hg == -100 && mdev->net_conf->always_asbp)) { + if (hg == 100 || (hg == -100 && mdev->tconn->net_conf->always_asbp)) { int pcount = (mdev->state.role == R_PRIMARY) + (peer_role == R_PRIMARY); int forced = (hg == -100); @@ -2587,9 +2587,9 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } if (hg == -100) { - if (mdev->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1)) + if (mdev->tconn->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1)) hg = -1; - if (!mdev->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1)) + if (!mdev->tconn->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1)) hg = 1; if (abs(hg) < 100) @@ -2615,7 +2615,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol if (hg < 0 && /* by intention we do not use mydisk here. */ mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) { - switch (mdev->net_conf->rr_conflict) { + switch (mdev->tconn->net_conf->rr_conflict) { case ASB_CALL_HELPER: drbd_khelper(mdev, "pri-lost"); /* fall through */ @@ -2628,7 +2628,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } } - if (mdev->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) { + if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) { if (hg == 0) dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n"); else @@ -2701,38 +2701,38 @@ static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsig if (cf & CF_DRY_RUN) set_bit(CONN_DRY_RUN, &mdev->flags); - if (p_proto != mdev->net_conf->wire_protocol) { + if (p_proto != mdev->tconn->net_conf->wire_protocol) { dev_err(DEV, "incompatible communication protocols\n"); goto disconnect; } - if (cmp_after_sb(p_after_sb_0p, mdev->net_conf->after_sb_0p)) { + if (cmp_after_sb(p_after_sb_0p, mdev->tconn->net_conf->after_sb_0p)) { dev_err(DEV, "incompatible after-sb-0pri settings\n"); goto disconnect; } - if (cmp_after_sb(p_after_sb_1p, mdev->net_conf->after_sb_1p)) { + if (cmp_after_sb(p_after_sb_1p, mdev->tconn->net_conf->after_sb_1p)) { dev_err(DEV, "incompatible after-sb-1pri settings\n"); goto disconnect; } - if (cmp_after_sb(p_after_sb_2p, mdev->net_conf->after_sb_2p)) { + if (cmp_after_sb(p_after_sb_2p, mdev->tconn->net_conf->after_sb_2p)) { dev_err(DEV, "incompatible after-sb-2pri settings\n"); goto disconnect; } - if (p_want_lose && mdev->net_conf->want_lose) { + if (p_want_lose && mdev->tconn->net_conf->want_lose) { dev_err(DEV, "both sides have the 'want_lose' flag set\n"); goto disconnect; } - if (p_two_primaries != mdev->net_conf->two_primaries) { + if (p_two_primaries != mdev->tconn->net_conf->two_primaries) { dev_err(DEV, "incompatible setting of the two-primaries options\n"); goto disconnect; } if (mdev->agreed_pro_version >= 87) { - unsigned char *my_alg = mdev->net_conf->integrity_alg; + unsigned char *my_alg = mdev->tconn->net_conf->integrity_alg; if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size) return false; @@ -3312,7 +3312,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned } } - mdev->net_conf->want_lose = 0; + mdev->tconn->net_conf->want_lose = 0; drbd_md_sync(mdev); /* update connected indicator, la_size, ... */ @@ -3844,8 +3844,8 @@ static void drbd_disconnect(struct drbd_conf *mdev) crypto_free_hash(mdev->cram_hmac_tfm); mdev->cram_hmac_tfm = NULL; - kfree(mdev->net_conf); - mdev->net_conf = NULL; + kfree(mdev->tconn->net_conf); + mdev->tconn->net_conf = NULL; drbd_request_state(mdev, NS(conn, C_STANDALONE)); } @@ -4005,7 +4005,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) char *response = NULL; char *right_response = NULL; char *peers_ch = NULL; - unsigned int key_len = strlen(mdev->net_conf->shared_secret); + unsigned int key_len = strlen(mdev->tconn->net_conf->shared_secret); unsigned int resp_size; struct hash_desc desc; enum drbd_packets cmd; @@ -4016,7 +4016,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) desc.flags = 0; rv = crypto_hash_setkey(mdev->cram_hmac_tfm, - (u8 *)mdev->net_conf->shared_secret, key_len); + (u8 *)mdev->tconn->net_conf->shared_secret, key_len); if (rv) { dev_err(DEV, "crypto_hash_setkey() failed with %d\n", rv); rv = -1; @@ -4130,7 +4130,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) if (rv) dev_info(DEV, "Peer authenticated using %d bytes of '%s' HMAC\n", - resp_size, mdev->net_conf->cram_hmac_alg); + resp_size, mdev->tconn->net_conf->cram_hmac_alg); else rv = -1; @@ -4207,7 +4207,7 @@ static int got_Ping(struct drbd_conf *mdev, struct p_header80 *h) static int got_PingAck(struct drbd_conf *mdev, struct p_header80 *h) { /* restore idle timeout */ - mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ; + mdev->meta.socket->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ; if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags)) wake_up(&mdev->misc_wait); @@ -4275,19 +4275,19 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) } switch (be16_to_cpu(h->command)) { case P_RS_WRITE_ACK: - D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); + D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); what = WRITE_ACKED_BY_PEER_AND_SIS; break; case P_WRITE_ACK: - D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); + D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); what = WRITE_ACKED_BY_PEER; break; case P_RECV_ACK: - D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_B); + D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B); what = RECV_ACKED_BY_PEER; break; case P_DISCARD_ACK: - D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); + D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); what = CONFLICT_DISCARDED_BY_PEER; break; default: @@ -4305,8 +4305,8 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) struct p_block_ack *p = (struct p_block_ack *)h; sector_t sector = be64_to_cpu(p->sector); int size = be32_to_cpu(p->blksize); - bool missing_ok = mdev->net_conf->wire_protocol == DRBD_PROT_A || - mdev->net_conf->wire_protocol == DRBD_PROT_B; + bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A || + mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B; bool found; update_peer_seq(mdev, be32_to_cpu(p->seq_num)); @@ -4502,13 +4502,13 @@ int drbd_asender(struct drbd_thread *thi) goto reconnect; } mdev->meta.socket->sk->sk_rcvtimeo = - mdev->net_conf->ping_timeo*HZ/10; + mdev->tconn->net_conf->ping_timeo*HZ/10; ping_timeout_active = 1; } /* conditionally cork; * it may hurt latency if we cork without much to send */ - if (!mdev->net_conf->no_cork && + if (!mdev->tconn->net_conf->no_cork && 3 < atomic_read(&mdev->unacked_cnt)) drbd_tcp_cork(mdev->meta.socket); while (1) { @@ -4528,7 +4528,7 @@ int drbd_asender(struct drbd_thread *thi) break; } /* but unconditionally uncork unless disabled */ - if (!mdev->net_conf->no_cork) + if (!mdev->tconn->net_conf->no_cork) drbd_tcp_uncork(mdev->meta.socket); /* short circuit, recv_msg would return EINTR anyways. */ diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index b3b1d4edbb0..2b2662d4ab3 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -528,7 +528,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, drbd_queue_work(&mdev->data.work, &req->w); /* close the epoch, in case it outgrew the limit */ - if (mdev->newest_tle->n_writes >= mdev->net_conf->max_epoch_size) + if (mdev->newest_tle->n_writes >= mdev->tconn->net_conf->max_epoch_size) queue_barrier(mdev); break; @@ -558,7 +558,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, atomic_add(req->i.size >> 9, &mdev->ap_in_flight); if (bio_data_dir(req->master_bio) == WRITE && - mdev->net_conf->wire_protocol == DRBD_PROT_A) { + mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A) { /* this is what is dangerous about protocol A: * pretend it was successfully written on the peer. */ if (req->rq_state & RQ_NET_PENDING) { @@ -697,8 +697,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, } if ((req->rq_state & RQ_NET_MASK) != 0) { req->rq_state |= RQ_NET_DONE; - if (mdev->net_conf->wire_protocol == DRBD_PROT_A) - atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); + if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A) + atomic_sub(req->i.size>>9, &mdev->ap_in_flight); } _req_may_be_done(req, m); /* Allowed while state.susp */ break; @@ -951,16 +951,16 @@ allocate_barrier: _req_mod(req, QUEUE_FOR_SEND_OOS); if (remote && - mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) { + mdev->tconn->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) { int congested = 0; - if (mdev->net_conf->cong_fill && - atomic_read(&mdev->ap_in_flight) >= mdev->net_conf->cong_fill) { + if (mdev->tconn->net_conf->cong_fill && + atomic_read(&mdev->ap_in_flight) >= mdev->tconn->net_conf->cong_fill) { dev_info(DEV, "Congestion-fill threshold reached\n"); congested = 1; } - if (mdev->act_log->used >= mdev->net_conf->cong_extents) { + if (mdev->act_log->used >= mdev->tconn->net_conf->cong_extents) { dev_info(DEV, "Congestion-extents threshold reached\n"); congested = 1; } @@ -968,9 +968,9 @@ allocate_barrier: if (congested) { queue_barrier(mdev); /* last barrier, after mirrored writes */ - if (mdev->net_conf->on_congestion == OC_PULL_AHEAD) + if (mdev->tconn->net_conf->on_congestion == OC_PULL_AHEAD) _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL); - else /*mdev->net_conf->on_congestion == OC_DISCONNECT */ + else /*mdev->tconn->net_conf->on_congestion == OC_DISCONNECT */ _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL); } } @@ -1182,7 +1182,7 @@ void request_timer_fn(unsigned long data) unsigned long et = 0; /* effective timeout = ko_count * timeout */ if (get_net_conf(mdev)) { - et = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count; + et = mdev->tconn->net_conf->timeout*HZ/10 * mdev->tconn->net_conf->ko_count; put_net_conf(mdev); } if (!et || mdev->state.conn < C_WF_REPORT_PARAMS) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 2e2c0659a3e..d8c61816d10 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1590,8 +1590,8 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) * the race considerably, but does not solve it. */ if (side == C_SYNC_SOURCE) schedule_timeout_interruptible( - mdev->net_conf->ping_int * HZ + - mdev->net_conf->ping_timeo*HZ/9); + mdev->tconn->net_conf->ping_int * HZ + + mdev->tconn->net_conf->ping_timeo*HZ/9); drbd_resync_finished(mdev); } @@ -1623,14 +1623,14 @@ int drbd_worker(struct drbd_thread *thi) if (down_trylock(&mdev->data.work.s)) { mutex_lock(&mdev->data.mutex); - if (mdev->data.socket && !mdev->net_conf->no_cork) + if (mdev->data.socket && !mdev->tconn->net_conf->no_cork) drbd_tcp_uncork(mdev->data.socket); mutex_unlock(&mdev->data.mutex); intr = down_interruptible(&mdev->data.work.s); mutex_lock(&mdev->data.mutex); - if (mdev->data.socket && !mdev->net_conf->no_cork) + if (mdev->data.socket && !mdev->tconn->net_conf->no_cork) drbd_tcp_cork(mdev->data.socket); mutex_unlock(&mdev->data.mutex); } From b2fb6dbe52dafa3cd18e0665937a0ebcc0892b92 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 13:48:44 +0100 Subject: [PATCH 030/609] drbd: moved net_cont and net_cnt_wait from mdev to tconn Patch partly generated by: sed -i -e 's/get_net_conf(mdev)/get_net_conf(mdev->tconn)/g' \ -e 's/put_net_conf(mdev)/put_net_conf(mdev->tconn)/g' \ -e 's/get_net_conf(odev)/get_net_conf(odev->tconn)/g' \ -e 's/put_net_conf(odev)/put_net_conf(odev->tconn)/g' \ *.[ch] Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 23 ++++++++++++----------- drivers/block/drbd/drbd_main.c | 9 +++++---- drivers/block/drbd/drbd_nl.c | 20 ++++++++++---------- drivers/block/drbd/drbd_receiver.c | 14 +++++++------- drivers/block/drbd/drbd_req.c | 10 +++++----- 5 files changed, 39 insertions(+), 37 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4c4c276e0eb..fd015502c62 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -967,6 +967,8 @@ struct drbd_tconn { /* is a resource from the config file */ struct drbd_conf *volume0; /* TODO: Remove me again */ struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ + atomic_t net_cnt; /* Users of net_conf */ + wait_queue_head_t net_cnt_wait; }; struct drbd_conf { @@ -1012,7 +1014,6 @@ struct drbd_conf { union drbd_state state; wait_queue_head_t misc_wait; wait_queue_head_t state_wait; /* upon each state change. */ - wait_queue_head_t net_cnt_wait; unsigned int send_cnt; unsigned int recv_cnt; unsigned int read_cnt; @@ -1024,7 +1025,7 @@ struct drbd_conf { atomic_t rs_pending_cnt; /* RS request/data packets on the wire */ atomic_t unacked_cnt; /* Need to send replys for */ atomic_t local_cnt; /* Waiting for local completion */ - atomic_t net_cnt; /* Users of net_conf */ + spinlock_t req_lock; struct drbd_tl_epoch *unused_spare_tle; /* for pre-allocation */ struct drbd_tl_epoch *newest_tle; @@ -2126,10 +2127,10 @@ static inline void inc_unacked(struct drbd_conf *mdev) ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0) -static inline void put_net_conf(struct drbd_conf *mdev) +static inline void put_net_conf(struct drbd_tconn *tconn) { - if (atomic_dec_and_test(&mdev->net_cnt)) - wake_up(&mdev->net_cnt_wait); + if (atomic_dec_and_test(&tconn->net_cnt)) + wake_up(&tconn->net_cnt_wait); } /** @@ -2138,14 +2139,14 @@ static inline void put_net_conf(struct drbd_conf *mdev) * * You have to call put_net_conf() when finished working with mdev->tconn->net_conf. */ -static inline int get_net_conf(struct drbd_conf *mdev) +static inline int get_net_conf(struct drbd_tconn *tconn) { int have_net_conf; - atomic_inc(&mdev->net_cnt); - have_net_conf = mdev->state.conn >= C_UNCONNECTED; + atomic_inc(&tconn->net_cnt); + have_net_conf = tconn->volume0->state.conn >= C_UNCONNECTED; if (!have_net_conf) - put_net_conf(mdev); + put_net_conf(tconn); return have_net_conf; } @@ -2251,9 +2252,9 @@ static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, static inline int drbd_get_max_buffers(struct drbd_conf *mdev) { int mxb = 1000000; /* arbitrary limit on open requests */ - if (get_net_conf(mdev)) { + if (get_net_conf(mdev->tconn)) { mxb = mdev->tconn->net_conf->max_buffers; - put_net_conf(mdev); + put_net_conf(mdev->tconn); } return mxb; } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 7e88a49d344..9a77a9b950d 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -692,11 +692,11 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) put_ldev(mdev); } - if (get_net_conf(mdev)) { + if (get_net_conf(mdev->tconn)) { if (!mdev->tconn->net_conf->two_primaries && ns.role == R_PRIMARY && ns.peer == R_PRIMARY) rv = SS_TWO_PRIMARIES; - put_net_conf(mdev); + put_net_conf(mdev->tconn); } if (rv <= 0) @@ -2972,7 +2972,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) atomic_set(&mdev->rs_pending_cnt, 0); atomic_set(&mdev->unacked_cnt, 0); atomic_set(&mdev->local_cnt, 0); - atomic_set(&mdev->net_cnt, 0); atomic_set(&mdev->packet_seq, 0); atomic_set(&mdev->pp_in_use, 0); atomic_set(&mdev->pp_in_use_by_net, 0); @@ -3031,7 +3030,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) init_waitqueue_head(&mdev->misc_wait); init_waitqueue_head(&mdev->state_wait); - init_waitqueue_head(&mdev->net_cnt_wait); init_waitqueue_head(&mdev->ee_wait); init_waitqueue_head(&mdev->al_wait); init_waitqueue_head(&mdev->seq_wait); @@ -3372,6 +3370,9 @@ struct drbd_tconn *drbd_new_tconn(char *name) if (!tconn->name) goto fail; + atomic_set(&tconn->net_cnt, 0); + init_waitqueue_head(&tconn->net_cnt_wait); + write_lock_irq(&global_state_lock); list_add(&tconn->all_tconn, &drbd_tconns); write_unlock_irq(&global_state_lock); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index d816c61cd98..a936d61a90c 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -149,7 +149,7 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev)); - if (get_net_conf(mdev)) { + if (get_net_conf(mdev->tconn)) { switch (((struct sockaddr *)mdev->tconn->net_conf->peer_addr)->sa_family) { case AF_INET6: afs = "ipv6"; @@ -169,7 +169,7 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) snprintf(af, 20, "DRBD_PEER_AF=%s", afs); envp[3]=af; envp[4]=ad; - put_net_conf(mdev); + put_net_conf(mdev->tconn); } /* The helper may take some time. @@ -409,9 +409,9 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) put_ldev(mdev); } } else { - if (get_net_conf(mdev)) { + if (get_net_conf(mdev->tconn)) { mdev->tconn->net_conf->want_lose = 0; - put_net_conf(mdev); + put_net_conf(mdev->tconn); } set_disk_ro(mdev->vdisk, false); if (get_ldev(mdev)) { @@ -971,9 +971,9 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp goto fail; } - if (get_net_conf(mdev)) { + if (get_net_conf(mdev->tconn)) { int prot = mdev->tconn->net_conf->wire_protocol; - put_net_conf(mdev); + put_net_conf(mdev->tconn); if (nbc->dc.fencing == FP_STONITH && prot == DRBD_PROT_A) { retcode = ERR_STONITH_AND_PROT_A; goto fail; @@ -1438,7 +1438,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, odev = minor_to_mdev(i); if (!odev || odev == mdev) continue; - if (get_net_conf(odev)) { + if (get_net_conf(odev->tconn)) { taken_addr = (struct sockaddr *)&odev->tconn->net_conf->my_addr; if (new_conf->my_addr_len == odev->tconn->net_conf->my_addr_len && !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len)) @@ -1449,7 +1449,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len)) retcode = ERR_PEER_ADDR; - put_net_conf(odev); + put_net_conf(odev->tconn); if (retcode != NO_ERROR) goto fail; } @@ -2050,9 +2050,9 @@ static int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl put_ldev(mdev); } - if (get_net_conf(mdev)) { + if (get_net_conf(mdev->tconn)) { tl = net_conf_to_tags(mdev, mdev->tconn->net_conf, tl); - put_net_conf(mdev); + put_net_conf(mdev->tconn); } tl = syncer_conf_to_tags(mdev, &mdev->sync_conf, tl); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e5e7dd1c6dd..8a01f278733 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -578,7 +578,7 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) int err; int disconnect_on_error = 1; - if (!get_net_conf(mdev)) + if (!get_net_conf(mdev->tconn)) return NULL; what = "sock_create_kern"; @@ -644,7 +644,7 @@ out: if (disconnect_on_error) drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); } - put_net_conf(mdev); + put_net_conf(mdev->tconn); return sock; } @@ -654,7 +654,7 @@ static struct socket *drbd_wait_for_connect(struct drbd_conf *mdev) struct socket *s_estab = NULL, *s_listen; const char *what; - if (!get_net_conf(mdev)) + if (!get_net_conf(mdev->tconn)) return NULL; what = "sock_create_kern"; @@ -692,7 +692,7 @@ out: drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); } } - put_net_conf(mdev); + put_net_conf(mdev->tconn); return s_estab; } @@ -3839,7 +3839,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) spin_unlock_irq(&mdev->req_lock); if (os.conn == C_DISCONNECTING) { - wait_event(mdev->net_cnt_wait, atomic_read(&mdev->net_cnt) == 0); + wait_event(mdev->tconn->net_cnt_wait, atomic_read(&mdev->tconn->net_cnt) == 0); crypto_free_hash(mdev->cram_hmac_tfm); mdev->cram_hmac_tfm = NULL; @@ -4166,9 +4166,9 @@ int drbdd_init(struct drbd_thread *thi) } while (h == 0); if (h > 0) { - if (get_net_conf(mdev)) { + if (get_net_conf(mdev->tconn)) { drbdd(mdev); - put_net_conf(mdev); + put_net_conf(mdev->tconn); } } diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 2b2662d4ab3..8f1e7db5e58 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -323,7 +323,7 @@ static int _req_conflicts(struct drbd_request *req) D_ASSERT(drbd_interval_empty(&req->i)); - if (!get_net_conf(mdev)) + if (!get_net_conf(mdev->tconn)) return 0; i = drbd_find_overlap(&mdev->write_requests, sector, size); @@ -359,11 +359,11 @@ static int _req_conflicts(struct drbd_request *req) /* this is like it should be, and what we expected. * our users do behave after all... */ - put_net_conf(mdev); + put_net_conf(mdev->tconn); return 0; out_conflict: - put_net_conf(mdev); + put_net_conf(mdev->tconn); return 1; } @@ -1181,9 +1181,9 @@ void request_timer_fn(unsigned long data) struct list_head *le; unsigned long et = 0; /* effective timeout = ko_count * timeout */ - if (get_net_conf(mdev)) { + if (get_net_conf(mdev->tconn)) { et = mdev->tconn->net_conf->timeout*HZ/10 * mdev->tconn->net_conf->ko_count; - put_net_conf(mdev); + put_net_conf(mdev->tconn); } if (!et || mdev->state.conn < C_WF_REPORT_PARAMS) return; /* Recurring timer stopped */ From e42325a57606396539807ff55c24febda39f8d01 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 13:55:45 +0100 Subject: [PATCH 031/609] drbd: moved data and meta from mdev to tconn Patch mostly: sed -i -e 's/mdev->data/mdev->tconn->data/g' \ -e 's/mdev->meta/mdev->tconn->meta/g' \ *.[ch] Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 4 +- drivers/block/drbd/drbd_int.h | 17 ++-- drivers/block/drbd/drbd_main.c | 122 ++++++++++++++--------------- drivers/block/drbd/drbd_receiver.c | 82 +++++++++---------- drivers/block/drbd/drbd_req.c | 12 +-- drivers/block/drbd/drbd_worker.c | 66 ++++++++-------- 6 files changed, 152 insertions(+), 151 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 9284b10e42b..794317778db 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -228,7 +228,7 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) al_work.enr = enr; al_work.old_enr = al_ext->lc_number; al_work.w.cb = w_al_write_transaction; - drbd_queue_work_front(&mdev->data.work, &al_work.w); + drbd_queue_work_front(&mdev->tconn->data.work, &al_work.w); wait_for_completion(&al_work.event); mdev->al_writ_cnt++; @@ -717,7 +717,7 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, if (udw) { udw->enr = ext->lce.lc_number; udw->w.cb = w_update_odbm; - drbd_queue_work_front(&mdev->data.work, &udw->w); + drbd_queue_work_front(&mdev->tconn->data.work, &udw->w); } else { dev_warn(DEV, "Could not kmalloc an udw\n"); } diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index fd015502c62..8de17b5bd42 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -969,6 +969,9 @@ struct drbd_tconn { /* is a resource from the config file */ struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ atomic_t net_cnt; /* Users of net_conf */ wait_queue_head_t net_cnt_wait; + + struct drbd_socket data; /* data/barrier/cstate/parameter packets */ + struct drbd_socket meta; /* ping/ack (metadata) packets */ }; struct drbd_conf { @@ -987,8 +990,6 @@ struct drbd_conf { struct block_device *this_bdev; struct gendisk *vdisk; - struct drbd_socket data; /* data/barrier/cstate/parameter packets */ - struct drbd_socket meta; /* ping/ack (metadata) packets */ int agreed_pro_version; /* actually used protocol version */ unsigned long last_received; /* in jiffies, either socket */ unsigned int ko_count; @@ -1167,11 +1168,11 @@ static inline unsigned int mdev_to_minor(struct drbd_conf *mdev) */ static inline int drbd_get_data_sock(struct drbd_conf *mdev) { - mutex_lock(&mdev->data.mutex); + mutex_lock(&mdev->tconn->data.mutex); /* drbd_disconnect() could have called drbd_free_sock() * while we were waiting in down()... */ - if (unlikely(mdev->data.socket == NULL)) { - mutex_unlock(&mdev->data.mutex); + if (unlikely(mdev->tconn->data.socket == NULL)) { + mutex_unlock(&mdev->tconn->data.mutex); return 0; } return 1; @@ -1179,7 +1180,7 @@ static inline int drbd_get_data_sock(struct drbd_conf *mdev) static inline void drbd_put_data_sock(struct drbd_conf *mdev) { - mutex_unlock(&mdev->data.mutex); + mutex_unlock(&mdev->tconn->data.mutex); } /* @@ -2399,7 +2400,7 @@ static inline void dec_ap_bio(struct drbd_conf *mdev) wake_up(&mdev->misc_wait); if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) { if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) - drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w); + drbd_queue_work(&mdev->tconn->data.work, &mdev->bm_io_work.w); } } @@ -2439,7 +2440,7 @@ static inline void update_peer_seq(struct drbd_conf *mdev, unsigned int new_seq) static inline void drbd_update_congested(struct drbd_conf *mdev) { - struct sock *sk = mdev->data.socket->sk; + struct sock *sk = mdev->tconn->data.socket->sk; if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5) set_bit(NET_CONGESTED, &mdev->flags); } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 9a77a9b950d..84e40fbfd3e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -371,7 +371,7 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) set_bit(CREATE_BARRIER, &mdev->flags); } - drbd_queue_work(&mdev->data.work, &b->w); + drbd_queue_work(&mdev->tconn->data.work, &b->w); } pn = &b->next; } else { @@ -1251,7 +1251,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, ascw->flags = flags; ascw->w.cb = w_after_state_ch; ascw->done = done; - drbd_queue_work(&mdev->data.work, &ascw->w); + drbd_queue_work(&mdev->tconn->data.work, &ascw->w); } else { dev_warn(DEV, "Could not kmalloc an ascw\n"); } @@ -1855,11 +1855,11 @@ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, struct socket *sock; if (use_data_socket) { - mutex_lock(&mdev->data.mutex); - sock = mdev->data.socket; + mutex_lock(&mdev->tconn->data.mutex); + sock = mdev->tconn->data.socket; } else { - mutex_lock(&mdev->meta.mutex); - sock = mdev->meta.socket; + mutex_lock(&mdev->tconn->meta.mutex); + sock = mdev->tconn->meta.socket; } /* drbd_disconnect() could have called drbd_free_sock() @@ -1868,9 +1868,9 @@ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, ok = _drbd_send_cmd(mdev, sock, cmd, h, size, 0); if (use_data_socket) - mutex_unlock(&mdev->data.mutex); + mutex_unlock(&mdev->tconn->data.mutex); else - mutex_unlock(&mdev->meta.mutex); + mutex_unlock(&mdev->tconn->meta.mutex); return ok; } @@ -1888,9 +1888,9 @@ int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data, return 0; ok = (sizeof(h) == - drbd_send(mdev, mdev->data.socket, &h, sizeof(h), 0)); + drbd_send(mdev, mdev->tconn->data.socket, &h, sizeof(h), 0)); ok = ok && (size == - drbd_send(mdev, mdev->data.socket, data, size, 0)); + drbd_send(mdev, mdev->tconn->data.socket, data, size, 0)); drbd_put_data_sock(mdev); @@ -1913,13 +1913,13 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) /* used from admin command context and receiver/worker context. * to avoid kmalloc, grab the socket right here, * then use the pre-allocated sbuf there */ - mutex_lock(&mdev->data.mutex); - sock = mdev->data.socket; + mutex_lock(&mdev->tconn->data.mutex); + sock = mdev->tconn->data.socket; if (likely(sock != NULL)) { enum drbd_packets cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM; - p = &mdev->data.sbuf.rs_param_95; + p = &mdev->tconn->data.sbuf.rs_param_95; /* initialize verify_alg and csums_alg */ memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); @@ -1939,7 +1939,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) } else rv = 0; /* not ok */ - mutex_unlock(&mdev->data.mutex); + mutex_unlock(&mdev->tconn->data.mutex); return rv; } @@ -2106,17 +2106,17 @@ int drbd_send_state(struct drbd_conf *mdev) * of a cluster wide state change on another thread */ drbd_state_lock(mdev); - mutex_lock(&mdev->data.mutex); + mutex_lock(&mdev->tconn->data.mutex); p.state = cpu_to_be32(mdev->state.i); /* Within the send mutex */ - sock = mdev->data.socket; + sock = mdev->tconn->data.socket; if (likely(sock != NULL)) { ok = _drbd_send_cmd(mdev, sock, P_STATE, (struct p_header80 *)&p, sizeof(p), 0); } - mutex_unlock(&mdev->data.mutex); + mutex_unlock(&mdev->tconn->data.mutex); drbd_state_unlock(mdev); return ok; @@ -2260,7 +2260,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, if (len) { DCBP_set_code(p, RLE_VLI_Bits); - ok = _drbd_send_cmd(mdev, mdev->data.socket, P_COMPRESSED_BITMAP, h, + ok = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_COMPRESSED_BITMAP, h, sizeof(*p) + len, 0); c->packets[0]++; @@ -2275,7 +2275,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, len = num_words * sizeof(long); if (len) drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload); - ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BITMAP, + ok = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_BITMAP, h, sizeof(struct p_header80) + len, 0); c->word_offset += num_words; c->bit_offset = c->word_offset * BITS_PER_LONG; @@ -2391,7 +2391,7 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, p.blksize = blksize; p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq)); - if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED) + if (!mdev->tconn->meta.socket || mdev->state.conn < C_CONNECTED) return false; ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd, (struct p_header80 *)&p, sizeof(p)); @@ -2473,12 +2473,12 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev, p.head.command = cpu_to_be16(cmd); p.head.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size); - mutex_lock(&mdev->data.mutex); + mutex_lock(&mdev->tconn->data.mutex); - ok = (sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), 0)); - ok = ok && (digest_size == drbd_send(mdev, mdev->data.socket, digest, digest_size, 0)); + ok = (sizeof(p) == drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), 0)); + ok = ok && (digest_size == drbd_send(mdev, mdev->tconn->data.socket, digest, digest_size, 0)); - mutex_unlock(&mdev->data.mutex); + mutex_unlock(&mdev->tconn->data.mutex); return ok; } @@ -2506,7 +2506,7 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * int drop_it; /* long elapsed = (long)(jiffies - mdev->last_received); */ - drop_it = mdev->meta.socket == sock + drop_it = mdev->tconn->meta.socket == sock || !mdev->asender.task || get_t_state(&mdev->asender) != RUNNING || mdev->state.conn < C_CONNECTED; @@ -2548,7 +2548,7 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, int offset, size_t size, unsigned msg_flags) { - int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, msg_flags); + int sent = drbd_send(mdev, mdev->tconn->data.socket, kmap(page) + offset, size, msg_flags); kunmap(page); if (sent == size) mdev->send_cnt += size>>9; @@ -2575,12 +2575,12 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, drbd_update_congested(mdev); set_fs(KERNEL_DS); do { - sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page, + sent = mdev->tconn->data.socket->ops->sendpage(mdev->tconn->data.socket, page, offset, len, msg_flags); if (sent == -EAGAIN) { if (we_should_drop_the_connection(mdev, - mdev->data.socket)) + mdev->tconn->data.socket)) break; else continue; @@ -2699,11 +2699,11 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) p.dp_flags = cpu_to_be32(dp_flags); set_bit(UNPLUG_REMOTE, &mdev->flags); ok = (sizeof(p) == - drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0)); + drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0)); if (ok && dgs) { dgb = mdev->int_dig_out; drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); - ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); + ok = dgs == drbd_send(mdev, mdev->tconn->data.socket, dgb, dgs, 0); } if (ok) { /* For protocol A, we have to memcpy the payload into @@ -2781,11 +2781,11 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, if (!drbd_get_data_sock(mdev)) return 0; - ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0); + ok = sizeof(p) == drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0); if (ok && dgs) { dgb = mdev->int_dig_out; drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); - ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); + ok = dgs == drbd_send(mdev, mdev->tconn->data.socket, dgb, dgs, 0); } if (ok) ok = _drbd_send_zc_ee(mdev, e); @@ -2842,7 +2842,7 @@ int drbd_send(struct drbd_conf *mdev, struct socket *sock, msg.msg_controllen = 0; msg.msg_flags = msg_flags | MSG_NOSIGNAL; - if (sock == mdev->data.socket) { + if (sock == mdev->tconn->data.socket) { mdev->ko_count = mdev->tconn->net_conf->ko_count; drbd_update_congested(mdev); } @@ -2875,13 +2875,13 @@ int drbd_send(struct drbd_conf *mdev, struct socket *sock, iov.iov_len -= rv; } while (sent < size); - if (sock == mdev->data.socket) + if (sock == mdev->tconn->data.socket) clear_bit(NET_CONGESTED, &mdev->flags); if (rv <= 0) { if (rv != -EAGAIN) { dev_err(DEV, "%s_sendmsg returned %d\n", - sock == mdev->meta.socket ? "msock" : "sock", + sock == mdev->tconn->meta.socket ? "msock" : "sock", rv); drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE)); } else @@ -2980,14 +2980,14 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) atomic_set(&mdev->ap_in_flight, 0); mutex_init(&mdev->md_io_mutex); - mutex_init(&mdev->data.mutex); - mutex_init(&mdev->meta.mutex); - sema_init(&mdev->data.work.s, 0); - sema_init(&mdev->meta.work.s, 0); + mutex_init(&mdev->tconn->data.mutex); + mutex_init(&mdev->tconn->meta.mutex); + sema_init(&mdev->tconn->data.work.s, 0); + sema_init(&mdev->tconn->meta.work.s, 0); mutex_init(&mdev->state_mutex); - spin_lock_init(&mdev->data.work.q_lock); - spin_lock_init(&mdev->meta.work.q_lock); + spin_lock_init(&mdev->tconn->data.work.q_lock); + spin_lock_init(&mdev->tconn->meta.work.q_lock); spin_lock_init(&mdev->al_lock); spin_lock_init(&mdev->req_lock); @@ -3000,8 +3000,8 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) INIT_LIST_HEAD(&mdev->read_ee); INIT_LIST_HEAD(&mdev->net_ee); INIT_LIST_HEAD(&mdev->resync_reads); - INIT_LIST_HEAD(&mdev->data.work.q); - INIT_LIST_HEAD(&mdev->meta.work.q); + INIT_LIST_HEAD(&mdev->tconn->data.work.q); + INIT_LIST_HEAD(&mdev->tconn->meta.work.q); INIT_LIST_HEAD(&mdev->resync_work.list); INIT_LIST_HEAD(&mdev->unplug_work.list); INIT_LIST_HEAD(&mdev->go_diskless.list); @@ -3093,8 +3093,8 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev) D_ASSERT(list_empty(&mdev->read_ee)); D_ASSERT(list_empty(&mdev->net_ee)); D_ASSERT(list_empty(&mdev->resync_reads)); - D_ASSERT(list_empty(&mdev->data.work.q)); - D_ASSERT(list_empty(&mdev->meta.work.q)); + D_ASSERT(list_empty(&mdev->tconn->data.work.q)); + D_ASSERT(list_empty(&mdev->tconn->meta.work.q)); D_ASSERT(list_empty(&mdev->resync_work.list)); D_ASSERT(list_empty(&mdev->unplug_work.list)); D_ASSERT(list_empty(&mdev->go_diskless.list)); @@ -3254,7 +3254,7 @@ static void drbd_delete_device(unsigned int minor) /* paranoia asserts */ D_ASSERT(mdev->open_cnt == 0); - D_ASSERT(list_empty(&mdev->data.work.q)); + D_ASSERT(list_empty(&mdev->tconn->data.work.q)); /* end paranoia asserts */ del_gendisk(mdev->vdisk); @@ -3606,19 +3606,19 @@ void drbd_free_bc(struct drbd_backing_dev *ldev) void drbd_free_sock(struct drbd_conf *mdev) { - if (mdev->data.socket) { - mutex_lock(&mdev->data.mutex); - kernel_sock_shutdown(mdev->data.socket, SHUT_RDWR); - sock_release(mdev->data.socket); - mdev->data.socket = NULL; - mutex_unlock(&mdev->data.mutex); + if (mdev->tconn->data.socket) { + mutex_lock(&mdev->tconn->data.mutex); + kernel_sock_shutdown(mdev->tconn->data.socket, SHUT_RDWR); + sock_release(mdev->tconn->data.socket); + mdev->tconn->data.socket = NULL; + mutex_unlock(&mdev->tconn->data.mutex); } - if (mdev->meta.socket) { - mutex_lock(&mdev->meta.mutex); - kernel_sock_shutdown(mdev->meta.socket, SHUT_RDWR); - sock_release(mdev->meta.socket); - mdev->meta.socket = NULL; - mutex_unlock(&mdev->meta.mutex); + if (mdev->tconn->meta.socket) { + mutex_lock(&mdev->tconn->meta.mutex); + kernel_sock_shutdown(mdev->tconn->meta.socket, SHUT_RDWR); + sock_release(mdev->tconn->meta.socket); + mdev->tconn->meta.socket = NULL; + mutex_unlock(&mdev->tconn->meta.mutex); } } @@ -4012,7 +4012,7 @@ void drbd_go_diskless(struct drbd_conf *mdev) { D_ASSERT(mdev->state.disk == D_FAILED); if (!test_and_set_bit(GO_DISKLESS, &mdev->flags)) - drbd_queue_work(&mdev->data.work, &mdev->go_diskless); + drbd_queue_work(&mdev->tconn->data.work, &mdev->go_diskless); } /** @@ -4050,7 +4050,7 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev, set_bit(BITMAP_IO, &mdev->flags); if (atomic_read(&mdev->ap_bio_cnt) == 0) { if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) - drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w); + drbd_queue_work(&mdev->tconn->data.work, &mdev->bm_io_work.w); } spin_unlock_irq(&mdev->req_lock); } @@ -4108,7 +4108,7 @@ static void md_sync_timer_fn(unsigned long data) { struct drbd_conf *mdev = (struct drbd_conf *) data; - drbd_queue_work_front(&mdev->data.work, &mdev->md_sync_work); + drbd_queue_work_front(&mdev->tconn->data.work, &mdev->md_sync_work); } static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 8a01f278733..2636bcc173a 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -516,7 +516,7 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) set_fs(KERNEL_DS); for (;;) { - rv = sock_recvmsg(mdev->data.socket, &msg, size, msg.msg_flags); + rv = sock_recvmsg(mdev->tconn->data.socket, &msg, size, msg.msg_flags); if (rv == size) break; @@ -700,14 +700,14 @@ out: static int drbd_send_fp(struct drbd_conf *mdev, struct socket *sock, enum drbd_packets cmd) { - struct p_header80 *h = &mdev->data.sbuf.header.h80; + struct p_header80 *h = &mdev->tconn->data.sbuf.header.h80; return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0); } static enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock) { - struct p_header80 *h = &mdev->data.rbuf.header.h80; + struct p_header80 *h = &mdev->tconn->data.rbuf.header.h80; int rr; rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0); @@ -755,7 +755,7 @@ static int drbd_connect(struct drbd_conf *mdev) struct socket *s, *sock, *msock; int try, h, ok; - D_ASSERT(!mdev->data.socket); + D_ASSERT(!mdev->tconn->data.socket); if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS) return -2; @@ -870,8 +870,8 @@ retry: drbd_tcp_nodelay(sock); drbd_tcp_nodelay(msock); - mdev->data.socket = sock; - mdev->meta.socket = msock; + mdev->tconn->data.socket = sock; + mdev->tconn->meta.socket = msock; mdev->last_received = jiffies; D_ASSERT(mdev->asender.task == NULL); @@ -925,7 +925,7 @@ out_release_sockets: static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsigned int *packet_size) { - union p_header *h = &mdev->data.rbuf.header; + union p_header *h = &mdev->tconn->data.rbuf.header; int r; r = drbd_recv(mdev, h, sizeof(*h)); @@ -1163,7 +1163,7 @@ fail: static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { int rv; - struct p_barrier *p = &mdev->data.rbuf.barrier; + struct p_barrier *p = &mdev->tconn->data.rbuf.barrier; struct drbd_epoch *epoch; inc_unacked(mdev); @@ -1494,7 +1494,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi struct drbd_request *req; sector_t sector; int ok; - struct p_data *p = &mdev->data.rbuf.data; + struct p_data *p = &mdev->tconn->data.rbuf.data; sector = be64_to_cpu(p->sector); @@ -1522,7 +1522,7 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packets cmd, un { sector_t sector; int ok; - struct p_data *p = &mdev->data.rbuf.data; + struct p_data *p = &mdev->tconn->data.rbuf.data; sector = be64_to_cpu(p->sector); D_ASSERT(p->block_id == ID_SYNCER); @@ -1675,7 +1675,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned { sector_t sector; struct drbd_epoch_entry *e; - struct p_data *p = &mdev->data.rbuf.data; + struct p_data *p = &mdev->tconn->data.rbuf.data; int rw = WRITE; u32 dp_flags; @@ -1964,7 +1964,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un struct digest_info *di = NULL; int size, verb; unsigned int fault_type; - struct p_block_req *p = &mdev->data.rbuf.block_req; + struct p_block_req *p = &mdev->tconn->data.rbuf.block_req; sector = be64_to_cpu(p->sector); size = be32_to_cpu(p->blksize); @@ -2683,7 +2683,7 @@ static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self) static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { - struct p_protocol *p = &mdev->data.rbuf.protocol; + struct p_protocol *p = &mdev->tconn->data.rbuf.protocol; int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; int p_want_lose, p_two_primaries, cf; char p_integrity_alg[SHARED_SECRET_MAX] = ""; @@ -2783,7 +2783,7 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int packet_size) { int ok = true; - struct p_rs_param_95 *p = &mdev->data.rbuf.rs_param_95; + struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95; unsigned int header_size, data_size, exp_max_sz; struct crypto_hash *verify_tfm = NULL; struct crypto_hash *csums_tfm = NULL; @@ -2946,7 +2946,7 @@ static void warn_if_differ_considerably(struct drbd_conf *mdev, static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { - struct p_sizes *p = &mdev->data.rbuf.sizes; + struct p_sizes *p = &mdev->tconn->data.rbuf.sizes; enum determine_dev_size dd = unchanged; sector_t p_size, p_usize, my_usize; int ldsc = 0; /* local disk size changed */ @@ -3049,7 +3049,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { - struct p_uuids *p = &mdev->data.rbuf.uuids; + struct p_uuids *p = &mdev->tconn->data.rbuf.uuids; u64 *p_uuid; int i, updated_uuids = 0; @@ -3143,7 +3143,7 @@ static union drbd_state convert_state(union drbd_state ps) static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { - struct p_req_state *p = &mdev->data.rbuf.req_state; + struct p_req_state *p = &mdev->tconn->data.rbuf.req_state; union drbd_state mask, val; enum drbd_state_rv rv; @@ -3169,7 +3169,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsi static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { - struct p_state *p = &mdev->data.rbuf.state; + struct p_state *p = &mdev->tconn->data.rbuf.state; union drbd_state os, ns, peer_state; enum drbd_disk_state real_peer_disk; enum chg_state_flags cs_flags; @@ -3321,7 +3321,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { - struct p_rs_uuid *p = &mdev->data.rbuf.rs_uuid; + struct p_rs_uuid *p = &mdev->tconn->data.rbuf.rs_uuid; wait_event(mdev->misc_wait, mdev->state.conn == C_WF_SYNC_UUID || @@ -3520,7 +3520,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne void *buffer; int err; int ok = false; - struct p_header80 *h = &mdev->data.rbuf.header.h80; + struct p_header80 *h = &mdev->tconn->data.rbuf.header.h80; drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED); /* you are supposed to send additional out-of-sync information @@ -3629,14 +3629,14 @@ static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, u { /* Make sure we've acked all the TCP data associated * with the data requests being unplugged */ - drbd_tcp_quickack(mdev->data.socket); + drbd_tcp_quickack(mdev->tconn->data.socket); return true; } static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { - struct p_block_desc *p = &mdev->data.rbuf.block_desc; + struct p_block_desc *p = &mdev->tconn->data.rbuf.block_desc; switch (mdev->state.conn) { case C_WF_SYNC_UUID: @@ -3690,15 +3690,15 @@ static struct data_cmd drbd_cmd_handler[] = { }; /* All handler functions that expect a sub-header get that sub-heder in - mdev->data.rbuf.header.head.payload. + mdev->tconn->data.rbuf.header.head.payload. - Usually in mdev->data.rbuf.header.head the callback can find the usual + Usually in mdev->tconn->data.rbuf.header.head the callback can find the usual p_header, but they may not rely on that. Since there is also p_header95 ! */ static void drbdd(struct drbd_conf *mdev) { - union p_header *header = &mdev->data.rbuf.header; + union p_header *header = &mdev->tconn->data.rbuf.header; unsigned int packet_size; enum drbd_packets cmd; size_t shs; /* sub header size */ @@ -3753,7 +3753,7 @@ void drbd_flush_workqueue(struct drbd_conf *mdev) barr.w.cb = w_prev_work_done; init_completion(&barr.done); - drbd_queue_work(&mdev->data.work, &barr.w); + drbd_queue_work(&mdev->tconn->data.work, &barr.w); wait_for_completion(&barr.done); } @@ -3892,25 +3892,25 @@ static void drbd_disconnect(struct drbd_conf *mdev) static int drbd_send_handshake(struct drbd_conf *mdev) { /* ASSERT current == mdev->receiver ... */ - struct p_handshake *p = &mdev->data.sbuf.handshake; + struct p_handshake *p = &mdev->tconn->data.sbuf.handshake; int ok; - if (mutex_lock_interruptible(&mdev->data.mutex)) { + if (mutex_lock_interruptible(&mdev->tconn->data.mutex)) { dev_err(DEV, "interrupted during initial handshake\n"); return 0; /* interrupted. not ok. */ } - if (mdev->data.socket == NULL) { - mutex_unlock(&mdev->data.mutex); + if (mdev->tconn->data.socket == NULL) { + mutex_unlock(&mdev->tconn->data.mutex); return 0; } memset(p, 0, sizeof(*p)); p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); - ok = _drbd_send_cmd( mdev, mdev->data.socket, P_HAND_SHAKE, + ok = _drbd_send_cmd( mdev, mdev->tconn->data.socket, P_HAND_SHAKE, (struct p_header80 *)p, sizeof(*p), 0 ); - mutex_unlock(&mdev->data.mutex); + mutex_unlock(&mdev->tconn->data.mutex); return ok; } @@ -3924,7 +3924,7 @@ static int drbd_send_handshake(struct drbd_conf *mdev) static int drbd_do_handshake(struct drbd_conf *mdev) { /* ASSERT current == mdev->receiver ... */ - struct p_handshake *p = &mdev->data.rbuf.handshake; + struct p_handshake *p = &mdev->tconn->data.rbuf.handshake; const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80); unsigned int length; enum drbd_packets cmd; @@ -4207,7 +4207,7 @@ static int got_Ping(struct drbd_conf *mdev, struct p_header80 *h) static int got_PingAck(struct drbd_conf *mdev, struct p_header80 *h) { /* restore idle timeout */ - mdev->meta.socket->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ; + mdev->tconn->meta.socket->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ; if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags)) wake_up(&mdev->misc_wait); @@ -4427,7 +4427,7 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) w = kmalloc(sizeof(*w), GFP_NOIO); if (w) { w->cb = w_ov_finished; - drbd_queue_work_front(&mdev->data.work, w); + drbd_queue_work_front(&mdev->tconn->data.work, w); } else { dev_err(DEV, "kmalloc(w) failed."); ov_oos_print(mdev); @@ -4479,7 +4479,7 @@ static struct asender_cmd *get_asender_cmd(int cmd) int drbd_asender(struct drbd_thread *thi) { struct drbd_conf *mdev = thi->mdev; - struct p_header80 *h = &mdev->meta.rbuf.header.h80; + struct p_header80 *h = &mdev->tconn->meta.rbuf.header.h80; struct asender_cmd *cmd = NULL; int rv, len; @@ -4501,7 +4501,7 @@ int drbd_asender(struct drbd_thread *thi) dev_err(DEV, "drbd_send_ping has failed\n"); goto reconnect; } - mdev->meta.socket->sk->sk_rcvtimeo = + mdev->tconn->meta.socket->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_timeo*HZ/10; ping_timeout_active = 1; } @@ -4510,7 +4510,7 @@ int drbd_asender(struct drbd_thread *thi) * it may hurt latency if we cork without much to send */ if (!mdev->tconn->net_conf->no_cork && 3 < atomic_read(&mdev->unacked_cnt)) - drbd_tcp_cork(mdev->meta.socket); + drbd_tcp_cork(mdev->tconn->meta.socket); while (1) { clear_bit(SIGNAL_ASENDER, &mdev->flags); flush_signals(current); @@ -4529,13 +4529,13 @@ int drbd_asender(struct drbd_thread *thi) } /* but unconditionally uncork unless disabled */ if (!mdev->tconn->net_conf->no_cork) - drbd_tcp_uncork(mdev->meta.socket); + drbd_tcp_uncork(mdev->tconn->meta.socket); /* short circuit, recv_msg would return EINTR anyways. */ if (signal_pending(current)) continue; - rv = drbd_recv_short(mdev, mdev->meta.socket, + rv = drbd_recv_short(mdev, mdev->tconn->meta.socket, buf, expect-received, 0); clear_bit(SIGNAL_ASENDER, &mdev->flags); @@ -4561,7 +4561,7 @@ int drbd_asender(struct drbd_thread *thi) /* If the data socket received something meanwhile, * that is good enough: peer is still alive. */ if (time_after(mdev->last_received, - jiffies - mdev->meta.socket->sk->sk_rcvtimeo)) + jiffies - mdev->tconn->meta.socket->sk->sk_rcvtimeo)) continue; if (ping_timeout_active) { dev_err(DEV, "PingAck did not arrive in time.\n"); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 8f1e7db5e58..ac43e440d66 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -127,7 +127,7 @@ static void queue_barrier(struct drbd_conf *mdev) * dec_ap_pending will be done in got_BarrierAck * or (on connection loss) in tl_clear. */ inc_ap_pending(mdev); - drbd_queue_work(&mdev->data.work, &b->w); + drbd_queue_work(&mdev->tconn->data.work, &b->w); set_bit(CREATE_BARRIER, &mdev->flags); } @@ -483,7 +483,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->w.cb = (req->rq_state & RQ_LOCAL_MASK) ? w_read_retry_remote : w_send_read_req; - drbd_queue_work(&mdev->data.work, &req->w); + drbd_queue_work(&mdev->tconn->data.work, &req->w); break; case QUEUE_FOR_NET_WRITE: @@ -525,7 +525,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, D_ASSERT(req->rq_state & RQ_NET_PENDING); req->rq_state |= RQ_NET_QUEUED; req->w.cb = w_send_dblock; - drbd_queue_work(&mdev->data.work, &req->w); + drbd_queue_work(&mdev->tconn->data.work, &req->w); /* close the epoch, in case it outgrew the limit */ if (mdev->newest_tle->n_writes >= mdev->tconn->net_conf->max_epoch_size) @@ -536,7 +536,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, case QUEUE_FOR_SEND_OOS: req->rq_state |= RQ_NET_QUEUED; req->w.cb = w_send_oos; - drbd_queue_work(&mdev->data.work, &req->w); + drbd_queue_work(&mdev->tconn->data.work, &req->w); break; case OOS_HANDED_TO_NETWORK: @@ -667,7 +667,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, get_ldev(mdev); req->w.cb = w_restart_disk_io; - drbd_queue_work(&mdev->data.work, &req->w); + drbd_queue_work(&mdev->tconn->data.work, &req->w); break; case RESEND: @@ -677,7 +677,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, We ensure that the peer was not rebooted */ if (!(req->rq_state & RQ_NET_OK)) { if (req->w.cb) { - drbd_queue_work(&mdev->data.work, &req->w); + drbd_queue_work(&mdev->tconn->data.work, &req->w); rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ; } break; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index d8c61816d10..9b1e2bad5fb 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -94,7 +94,7 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local) __drbd_chk_io_error(mdev, false); spin_unlock_irqrestore(&mdev->req_lock, flags); - drbd_queue_work(&mdev->data.work, &e->w); + drbd_queue_work(&mdev->tconn->data.work, &e->w); put_ldev(mdev); } @@ -400,7 +400,7 @@ void resync_timer_fn(unsigned long data) struct drbd_conf *mdev = (struct drbd_conf *) data; if (list_empty(&mdev->resync_work.list)) - drbd_queue_work(&mdev->data.work, &mdev->resync_work); + drbd_queue_work(&mdev->tconn->data.work, &mdev->resync_work); } static void fifo_set(struct fifo_buffer *fb, int value) @@ -538,15 +538,15 @@ static int w_make_resync_request(struct drbd_conf *mdev, for (i = 0; i < number; i++) { /* Stop generating RS requests, when half of the send buffer is filled */ - mutex_lock(&mdev->data.mutex); - if (mdev->data.socket) { - queued = mdev->data.socket->sk->sk_wmem_queued; - sndbuf = mdev->data.socket->sk->sk_sndbuf; + mutex_lock(&mdev->tconn->data.mutex); + if (mdev->tconn->data.socket) { + queued = mdev->tconn->data.socket->sk->sk_wmem_queued; + sndbuf = mdev->tconn->data.socket->sk->sk_sndbuf; } else { queued = 1; sndbuf = 0; } - mutex_unlock(&mdev->data.mutex); + mutex_unlock(&mdev->tconn->data.mutex); if (queued > sndbuf / 2) goto requeue; @@ -710,7 +710,7 @@ void start_resync_timer_fn(unsigned long data) { struct drbd_conf *mdev = (struct drbd_conf *) data; - drbd_queue_work(&mdev->data.work, &mdev->start_resync_work); + drbd_queue_work(&mdev->tconn->data.work, &mdev->start_resync_work); } int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel) @@ -775,7 +775,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC); if (w) { w->cb = w_resync_finished; - drbd_queue_work(&mdev->data.work, w); + drbd_queue_work(&mdev->tconn->data.work, w); return 1; } dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n"); @@ -1202,7 +1202,7 @@ int w_prev_work_done(struct drbd_conf *mdev, struct drbd_work *w, int cancel) int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { struct drbd_tl_epoch *b = container_of(w, struct drbd_tl_epoch, w); - struct p_barrier *p = &mdev->data.sbuf.barrier; + struct p_barrier *p = &mdev->tconn->data.sbuf.barrier; int ok = 1; /* really avoid racing with tl_clear. w.cb may have been referenced @@ -1223,7 +1223,7 @@ int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) /* inc_ap_pending was done where this was queued. * dec_ap_pending will be done in got_BarrierAck * or (on connection loss) in w_clear_epoch. */ - ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BARRIER, + ok = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_BARRIER, (struct p_header80 *)p, sizeof(*p), 0); drbd_put_data_sock(mdev); @@ -1621,18 +1621,18 @@ int drbd_worker(struct drbd_thread *thi) while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(mdev); - if (down_trylock(&mdev->data.work.s)) { - mutex_lock(&mdev->data.mutex); - if (mdev->data.socket && !mdev->tconn->net_conf->no_cork) - drbd_tcp_uncork(mdev->data.socket); - mutex_unlock(&mdev->data.mutex); + if (down_trylock(&mdev->tconn->data.work.s)) { + mutex_lock(&mdev->tconn->data.mutex); + if (mdev->tconn->data.socket && !mdev->tconn->net_conf->no_cork) + drbd_tcp_uncork(mdev->tconn->data.socket); + mutex_unlock(&mdev->tconn->data.mutex); - intr = down_interruptible(&mdev->data.work.s); + intr = down_interruptible(&mdev->tconn->data.work.s); - mutex_lock(&mdev->data.mutex); - if (mdev->data.socket && !mdev->tconn->net_conf->no_cork) - drbd_tcp_cork(mdev->data.socket); - mutex_unlock(&mdev->data.mutex); + mutex_lock(&mdev->tconn->data.mutex); + if (mdev->tconn->data.socket && !mdev->tconn->net_conf->no_cork) + drbd_tcp_cork(mdev->tconn->data.socket); + mutex_unlock(&mdev->tconn->data.mutex); } if (intr) { @@ -1650,8 +1650,8 @@ int drbd_worker(struct drbd_thread *thi) this... */ w = NULL; - spin_lock_irq(&mdev->data.work.q_lock); - if (!expect(!list_empty(&mdev->data.work.q))) { + spin_lock_irq(&mdev->tconn->data.work.q_lock); + if (!expect(!list_empty(&mdev->tconn->data.work.q))) { /* something terribly wrong in our logic. * we were able to down() the semaphore, * but the list is empty... doh. @@ -1663,12 +1663,12 @@ int drbd_worker(struct drbd_thread *thi) * * I'll try to get away just starting over this loop. */ - spin_unlock_irq(&mdev->data.work.q_lock); + spin_unlock_irq(&mdev->tconn->data.work.q_lock); continue; } - w = list_entry(mdev->data.work.q.next, struct drbd_work, list); + w = list_entry(mdev->tconn->data.work.q.next, struct drbd_work, list); list_del_init(&w->list); - spin_unlock_irq(&mdev->data.work.q_lock); + spin_unlock_irq(&mdev->tconn->data.work.q_lock); if (!w->cb(mdev, w, mdev->state.conn < C_CONNECTED)) { /* dev_warn(DEV, "worker: a callback failed! \n"); */ @@ -1680,11 +1680,11 @@ int drbd_worker(struct drbd_thread *thi) D_ASSERT(test_bit(DEVICE_DYING, &mdev->flags)); D_ASSERT(test_bit(CONFIG_PENDING, &mdev->flags)); - spin_lock_irq(&mdev->data.work.q_lock); + spin_lock_irq(&mdev->tconn->data.work.q_lock); i = 0; - while (!list_empty(&mdev->data.work.q)) { - list_splice_init(&mdev->data.work.q, &work_list); - spin_unlock_irq(&mdev->data.work.q_lock); + while (!list_empty(&mdev->tconn->data.work.q)) { + list_splice_init(&mdev->tconn->data.work.q, &work_list); + spin_unlock_irq(&mdev->tconn->data.work.q_lock); while (!list_empty(&work_list)) { w = list_entry(work_list.next, struct drbd_work, list); @@ -1693,15 +1693,15 @@ int drbd_worker(struct drbd_thread *thi) i++; /* dead debugging code */ } - spin_lock_irq(&mdev->data.work.q_lock); + spin_lock_irq(&mdev->tconn->data.work.q_lock); } - sema_init(&mdev->data.work.s, 0); + sema_init(&mdev->tconn->data.work.s, 0); /* DANGEROUS race: if someone did queue his work within the spinlock, * but up() ed outside the spinlock, we could get an up() on the * semaphore without corresponding list entry. * So don't do that. */ - spin_unlock_irq(&mdev->data.work.q_lock); + spin_unlock_irq(&mdev->tconn->data.work.q_lock); D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); /* _drbd_set_state only uses stop_nowait. From e6b3ea83bc72e126247b241c1164794a644d6fdc Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 14:02:01 +0100 Subject: [PATCH 032/609] drbd: moved receiver, worker and asender from mdev to tconn Patch mostly: sed -i -e 's/mdev->receiver/mdev->tconn->receiver/g' \ -e 's/mdev->worker/mdev->tconn->worker/g' \ -e 's/mdev->asender/mdev->tconn->asender/g' \ *.[ch] Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 24 ++++++++-------- drivers/block/drbd/drbd_int.h | 9 +++--- drivers/block/drbd/drbd_main.c | 44 +++++++++++++++--------------- drivers/block/drbd/drbd_nl.c | 10 +++---- drivers/block/drbd/drbd_receiver.c | 14 +++++----- drivers/block/drbd/drbd_worker.c | 4 +-- 6 files changed, 53 insertions(+), 52 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index c756b4dbd13..4da4c322fa5 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -119,13 +119,13 @@ static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) if (!__ratelimit(&drbd_ratelimit_state)) return; dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n", - current == mdev->receiver.task ? "receiver" : - current == mdev->asender.task ? "asender" : - current == mdev->worker.task ? "worker" : current->comm, + current == mdev->tconn->receiver.task ? "receiver" : + current == mdev->tconn->asender.task ? "asender" : + current == mdev->tconn->worker.task ? "worker" : current->comm, func, b->bm_why ?: "?", - b->bm_task == mdev->receiver.task ? "receiver" : - b->bm_task == mdev->asender.task ? "asender" : - b->bm_task == mdev->worker.task ? "worker" : "?"); + b->bm_task == mdev->tconn->receiver.task ? "receiver" : + b->bm_task == mdev->tconn->asender.task ? "asender" : + b->bm_task == mdev->tconn->worker.task ? "worker" : "?"); } void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) @@ -142,13 +142,13 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) if (trylock_failed) { dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n", - current == mdev->receiver.task ? "receiver" : - current == mdev->asender.task ? "asender" : - current == mdev->worker.task ? "worker" : current->comm, + current == mdev->tconn->receiver.task ? "receiver" : + current == mdev->tconn->asender.task ? "asender" : + current == mdev->tconn->worker.task ? "worker" : current->comm, why, b->bm_why ?: "?", - b->bm_task == mdev->receiver.task ? "receiver" : - b->bm_task == mdev->asender.task ? "asender" : - b->bm_task == mdev->worker.task ? "worker" : "?"); + b->bm_task == mdev->tconn->receiver.task ? "receiver" : + b->bm_task == mdev->tconn->asender.task ? "asender" : + b->bm_task == mdev->tconn->worker.task ? "worker" : "?"); mutex_lock(&b->bm_change); } if (BM_LOCKED_MASK & b->bm_flags) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8de17b5bd42..c5b1167aab5 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -972,6 +972,10 @@ struct drbd_tconn { /* is a resource from the config file */ struct drbd_socket data; /* data/barrier/cstate/parameter packets */ struct drbd_socket meta; /* ping/ack (metadata) packets */ + + struct drbd_thread receiver; + struct drbd_thread worker; + struct drbd_thread asender; }; struct drbd_conf { @@ -1068,9 +1072,6 @@ struct drbd_conf { struct crypto_hash *csums_tfm; struct crypto_hash *verify_tfm; - struct drbd_thread receiver; - struct drbd_thread worker; - struct drbd_thread asender; struct drbd_bitmap *bitmap; unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */ @@ -2005,7 +2006,7 @@ drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) static inline void wake_asender(struct drbd_conf *mdev) { if (test_bit(SIGNAL_ASENDER, &mdev->flags)) - force_sig(DRBD_SIG, mdev->asender.task); + force_sig(DRBD_SIG, mdev->tconn->asender.task); } static inline void request_ping(struct drbd_conf *mdev) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 84e40fbfd3e..5d8a6e94a4a 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -613,7 +613,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, spin_unlock_irqrestore(&mdev->req_lock, flags); if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) { - D_ASSERT(current != mdev->worker.task); + D_ASSERT(current != mdev->tconn->worker.task); wait_for_completion(&done); } @@ -1229,16 +1229,16 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, /* Receiver should clean up itself */ if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING) - drbd_thread_stop_nowait(&mdev->receiver); + drbd_thread_stop_nowait(&mdev->tconn->receiver); /* Now the receiver finished cleaning up itself, it should die */ if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE) - drbd_thread_stop_nowait(&mdev->receiver); + drbd_thread_stop_nowait(&mdev->tconn->receiver); /* Upon network failure, we need to restart the receiver. */ if (os.conn > C_TEAR_DOWN && ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) - drbd_thread_restart_nowait(&mdev->receiver); + drbd_thread_restart_nowait(&mdev->tconn->receiver); /* Resume AL writing if we get a connection */ if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) @@ -1297,7 +1297,7 @@ int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, { int rv; - D_ASSERT(current == mdev->worker.task); + D_ASSERT(current == mdev->tconn->worker.task); /* open coded non-blocking drbd_suspend_io(mdev); */ set_bit(SUSPEND_IO, &mdev->flags); @@ -1598,7 +1598,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* Upon network connection, we need to start the receiver */ if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED) - drbd_thread_start(&mdev->receiver); + drbd_thread_start(&mdev->tconn->receiver); /* Terminate worker thread if we are unconfigured - it will be restarted as needed... */ @@ -1609,7 +1609,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, resume_next_sg(mdev); /* set in __drbd_set_state, unless CONFIG_PENDING was set */ if (test_bit(DEVICE_DYING, &mdev->flags)) - drbd_thread_stop_nowait(&mdev->worker); + drbd_thread_stop_nowait(&mdev->tconn->worker); } drbd_md_sync(mdev); @@ -1675,9 +1675,9 @@ int drbd_thread_start(struct drbd_thread *thi) unsigned long flags; const char *me = - thi == &mdev->receiver ? "receiver" : - thi == &mdev->asender ? "asender" : - thi == &mdev->worker ? "worker" : "NONSENSE"; + thi == &mdev->tconn->receiver ? "receiver" : + thi == &mdev->tconn->asender ? "asender" : + thi == &mdev->tconn->worker ? "worker" : "NONSENSE"; /* is used from state engine doing drbd_thread_stop_nowait, * while holding the req lock irqsave */ @@ -1807,9 +1807,9 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev) { struct task_struct *p = current; struct drbd_thread *thi = - p == mdev->asender.task ? &mdev->asender : - p == mdev->receiver.task ? &mdev->receiver : - p == mdev->worker.task ? &mdev->worker : + p == mdev->tconn->asender.task ? &mdev->tconn->asender : + p == mdev->tconn->receiver.task ? &mdev->tconn->receiver : + p == mdev->tconn->worker.task ? &mdev->tconn->worker : NULL; if (!expect(thi != NULL)) return; @@ -2507,8 +2507,8 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * /* long elapsed = (long)(jiffies - mdev->last_received); */ drop_it = mdev->tconn->meta.socket == sock - || !mdev->asender.task - || get_t_state(&mdev->asender) != RUNNING + || !mdev->tconn->asender.task + || get_t_state(&mdev->tconn->asender) != RUNNING || mdev->state.conn < C_CONNECTED; if (drop_it) @@ -3034,9 +3034,9 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) init_waitqueue_head(&mdev->al_wait); init_waitqueue_head(&mdev->seq_wait); - drbd_thread_init(mdev, &mdev->receiver, drbdd_init); - drbd_thread_init(mdev, &mdev->worker, drbd_worker); - drbd_thread_init(mdev, &mdev->asender, drbd_asender); + drbd_thread_init(mdev, &mdev->tconn->receiver, drbdd_init); + drbd_thread_init(mdev, &mdev->tconn->worker, drbd_worker); + drbd_thread_init(mdev, &mdev->tconn->asender, drbd_asender); mdev->agreed_pro_version = PRO_VERSION_MAX; mdev->write_ordering = WO_bdev_flush; @@ -3048,9 +3048,9 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) void drbd_mdev_cleanup(struct drbd_conf *mdev) { int i; - if (mdev->receiver.t_state != NONE) + if (mdev->tconn->receiver.t_state != NONE) dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n", - mdev->receiver.t_state); + mdev->tconn->receiver.t_state); /* no need to lock it, I'm the only thread alive */ if (atomic_read(&mdev->current_epoch->epoch_size) != 0) @@ -4032,7 +4032,7 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev, void (*done)(struct drbd_conf *, int), char *why, enum bm_flag flags) { - D_ASSERT(current == mdev->worker.task); + D_ASSERT(current == mdev->tconn->worker.task); D_ASSERT(!test_bit(BITMAP_IO_QUEUED, &mdev->flags)); D_ASSERT(!test_bit(BITMAP_IO, &mdev->flags)); @@ -4069,7 +4069,7 @@ int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), { int rv; - D_ASSERT(current != mdev->worker.task); + D_ASSERT(current != mdev->tconn->worker.task); if ((flags & BM_LOCKED_SET_ALLOWED) == 0) drbd_suspend_io(mdev); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index a936d61a90c..59bb58c9b22 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -875,7 +875,7 @@ static void drbd_reconfig_start(struct drbd_conf *mdev) { wait_event(mdev->state_wait, !test_and_set_bit(CONFIG_PENDING, &mdev->flags)); wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags)); - drbd_thread_start(&mdev->worker); + drbd_thread_start(&mdev->tconn->worker); drbd_flush_workqueue(mdev); } @@ -889,7 +889,7 @@ static void drbd_reconfig_done(struct drbd_conf *mdev) mdev->state.conn == C_STANDALONE && mdev->state.role == R_SECONDARY) { set_bit(DEVICE_DYING, &mdev->flags); - drbd_thread_stop_nowait(&mdev->worker); + drbd_thread_stop_nowait(&mdev->tconn->worker); } else clear_bit(CONFIG_PENDING, &mdev->flags); spin_unlock_irq(&mdev->req_lock); @@ -1887,9 +1887,9 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n if (!cpumask_equal(mdev->cpu_mask, new_cpu_mask)) { cpumask_copy(mdev->cpu_mask, new_cpu_mask); drbd_calc_cpu_mask(mdev); - mdev->receiver.reset_cpu_mask = 1; - mdev->asender.reset_cpu_mask = 1; - mdev->worker.reset_cpu_mask = 1; + mdev->tconn->receiver.reset_cpu_mask = 1; + mdev->tconn->asender.reset_cpu_mask = 1; + mdev->tconn->worker.reset_cpu_mask = 1; } kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 2636bcc173a..e9f670cd554 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -833,7 +833,7 @@ retry: if (signal_pending(current)) { flush_signals(current); smp_rmb(); - if (get_t_state(&mdev->receiver) == EXITING) + if (get_t_state(&mdev->tconn->receiver) == EXITING) goto out_release_sockets; } @@ -874,7 +874,7 @@ retry: mdev->tconn->meta.socket = msock; mdev->last_received = jiffies; - D_ASSERT(mdev->asender.task == NULL); + D_ASSERT(mdev->tconn->asender.task == NULL); h = drbd_do_handshake(mdev); if (h <= 0) @@ -901,7 +901,7 @@ retry: atomic_set(&mdev->packet_seq, 0); mdev->peer_seq = 0; - drbd_thread_start(&mdev->asender); + drbd_thread_start(&mdev->tconn->asender); if (drbd_send_protocol(mdev) == -1) return -1; @@ -3704,7 +3704,7 @@ static void drbdd(struct drbd_conf *mdev) size_t shs; /* sub header size */ int rv; - while (get_t_state(&mdev->receiver) == RUNNING) { + while (get_t_state(&mdev->tconn->receiver) == RUNNING) { drbd_thread_current_set_cpu(mdev); if (!drbd_recv_header(mdev, &cmd, &packet_size)) goto err_out; @@ -3768,7 +3768,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) return; /* asender does not clean up anything. it must not interfere, either */ - drbd_thread_stop(&mdev->asender); + drbd_thread_stop(&mdev->tconn->asender); drbd_free_sock(mdev); /* wait for current activity to cease. */ @@ -3891,7 +3891,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) */ static int drbd_send_handshake(struct drbd_conf *mdev) { - /* ASSERT current == mdev->receiver ... */ + /* ASSERT current == mdev->tconn->receiver ... */ struct p_handshake *p = &mdev->tconn->data.sbuf.handshake; int ok; @@ -3923,7 +3923,7 @@ static int drbd_send_handshake(struct drbd_conf *mdev) */ static int drbd_do_handshake(struct drbd_conf *mdev) { - /* ASSERT current == mdev->receiver ... */ + /* ASSERT current == mdev->tconn->receiver ... */ struct p_handshake *p = &mdev->tconn->data.rbuf.handshake; const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80); unsigned int length; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 9b1e2bad5fb..1ca7856f813 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1705,8 +1705,8 @@ int drbd_worker(struct drbd_thread *thi) D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); /* _drbd_set_state only uses stop_nowait. - * wait here for the EXITING receiver. */ - drbd_thread_stop(&mdev->receiver); + * wait here for the exiting receiver. */ + drbd_thread_stop(&mdev->tconn->receiver); drbd_mdev_cleanup(mdev); dev_info(DEV, "worker terminated\n"); From 31890f4ab299c4116cf0a104ca9ce4f9ca2c5da0 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 14:12:51 +0100 Subject: [PATCH 033/609] drbd: moved agreed_pro_version, last_received and ko_count to tconn sed -i \ -e 's/mdev->agreed_pro_version/mdev->tconn->agreed_pro_version/g' \ -e 's/mdev->last_received/mdev->tconn->last_received/g' \ -e 's/mdev->ko_count/mdev->tconn->ko_count/g' \ *.[ch] Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 8 +++--- drivers/block/drbd/drbd_main.c | 32 ++++++++++++------------ drivers/block/drbd/drbd_nl.c | 8 +++--- drivers/block/drbd/drbd_receiver.c | 40 +++++++++++++++--------------- drivers/block/drbd/drbd_req.c | 2 +- drivers/block/drbd/drbd_worker.c | 6 ++--- 6 files changed, 48 insertions(+), 48 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c5b1167aab5..9efe499b112 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -972,6 +972,9 @@ struct drbd_tconn { /* is a resource from the config file */ struct drbd_socket data; /* data/barrier/cstate/parameter packets */ struct drbd_socket meta; /* ping/ack (metadata) packets */ + int agreed_pro_version; /* actually used protocol version */ + unsigned long last_received; /* in jiffies, either socket */ + unsigned int ko_count; struct drbd_thread receiver; struct drbd_thread worker; @@ -994,9 +997,6 @@ struct drbd_conf { struct block_device *this_bdev; struct gendisk *vdisk; - int agreed_pro_version; /* actually used protocol version */ - unsigned long last_received; /* in jiffies, either socket */ - unsigned int ko_count; struct drbd_work resync_work, unplug_work, go_diskless, @@ -2297,7 +2297,7 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev) /* Allow IO in BM exchange states with new protocols */ case C_WF_BITMAP_S: - if (mdev->agreed_pro_version < 96) + if (mdev->tconn->agreed_pro_version < 96) return 0; break; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 5d8a6e94a4a..e06ca4a0d90 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -735,7 +735,7 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) rv = SS_NO_VERIFY_ALG; else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && - mdev->agreed_pro_version < 88) + mdev->tconn->agreed_pro_version < 88) rv = SS_NOT_SUPPORTED; else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN) @@ -993,7 +993,7 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state /* helper for __drbd_set_state */ static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs) { - if (mdev->agreed_pro_version < 90) + if (mdev->tconn->agreed_pro_version < 90) mdev->ov_start_sector = 0; mdev->rs_total = drbd_bm_bits(mdev); mdev->ov_position = 0; @@ -1393,7 +1393,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, * which is unexpected. */ if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) && (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) && - mdev->agreed_pro_version >= 96 && get_ldev(mdev)) { + mdev->tconn->agreed_pro_version >= 96 && get_ldev(mdev)) { drbd_gen_and_send_sync_uuid(mdev); put_ldev(mdev); } @@ -1902,7 +1902,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) struct p_rs_param_95 *p; struct socket *sock; int size, rv; - const int apv = mdev->agreed_pro_version; + const int apv = mdev->tconn->agreed_pro_version; size = apv <= 87 ? sizeof(struct p_rs_param) : apv == 88 ? sizeof(struct p_rs_param) @@ -1951,7 +1951,7 @@ int drbd_send_protocol(struct drbd_conf *mdev) size = sizeof(struct p_protocol); - if (mdev->agreed_pro_version >= 87) + if (mdev->tconn->agreed_pro_version >= 87) size += strlen(mdev->tconn->net_conf->integrity_alg) + 1; /* we must not recurse into our own queue, @@ -1970,7 +1970,7 @@ int drbd_send_protocol(struct drbd_conf *mdev) if (mdev->tconn->net_conf->want_lose) cf |= CF_WANT_LOSE; if (mdev->tconn->net_conf->dry_run) { - if (mdev->agreed_pro_version >= 92) + if (mdev->tconn->agreed_pro_version >= 92) cf |= CF_DRY_RUN; else { dev_err(DEV, "--dry-run is not supported by peer"); @@ -1980,7 +1980,7 @@ int drbd_send_protocol(struct drbd_conf *mdev) } p->conn_flags = cpu_to_be32(cf); - if (mdev->agreed_pro_version >= 87) + if (mdev->tconn->agreed_pro_version >= 87) strcpy(p->integrity_alg, mdev->tconn->net_conf->integrity_alg); rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL, @@ -2158,7 +2158,7 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, /* may we use this feature? */ if ((mdev->sync_conf.use_rle == 0) || - (mdev->agreed_pro_version < 90)) + (mdev->tconn->agreed_pro_version < 90)) return 0; if (c->bit_offset >= c->bm_bits) @@ -2404,7 +2404,7 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, struct p_data *dp, int data_size) { - data_size -= (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? + data_size -= (mdev->tconn->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? crypto_hash_digestsize(mdev->integrity_r_tfm) : 0; return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size), dp->block_id); @@ -2514,10 +2514,10 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * if (drop_it) return true; - drop_it = !--mdev->ko_count; + drop_it = !--mdev->tconn->ko_count; if (!drop_it) { dev_err(DEV, "[%s/%d] sock_sendmsg time expired, ko = %u\n", - current->comm, current->pid, mdev->ko_count); + current->comm, current->pid, mdev->tconn->ko_count); request_ping(mdev); } @@ -2647,7 +2647,7 @@ static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw) { - if (mdev->agreed_pro_version >= 95) + if (mdev->tconn->agreed_pro_version >= 95) return (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) | (bi_rw & REQ_FUA ? DP_FUA : 0) | (bi_rw & REQ_FLUSH ? DP_FLUSH : 0) | @@ -2670,7 +2670,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) if (!drbd_get_data_sock(mdev)) return 0; - dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? + dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; if (req->i.size <= DRBD_MAX_SIZE_H80_PACKET) { @@ -2755,7 +2755,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, void *dgb; int dgs; - dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? + dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; if (e->i.size <= DRBD_MAX_SIZE_H80_PACKET) { @@ -2843,7 +2843,7 @@ int drbd_send(struct drbd_conf *mdev, struct socket *sock, msg.msg_flags = msg_flags | MSG_NOSIGNAL; if (sock == mdev->tconn->data.socket) { - mdev->ko_count = mdev->tconn->net_conf->ko_count; + mdev->tconn->ko_count = mdev->tconn->net_conf->ko_count; drbd_update_congested(mdev); } do { @@ -3038,7 +3038,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) drbd_thread_init(mdev, &mdev->tconn->worker, drbd_worker); drbd_thread_init(mdev, &mdev->tconn->asender, drbd_asender); - mdev->agreed_pro_version = PRO_VERSION_MAX; + mdev->tconn->agreed_pro_version = PRO_VERSION_MAX; mdev->write_ordering = WO_bdev_flush; mdev->resync_wenr = LC_FREE; mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 59bb58c9b22..a9ede8fc888 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -845,9 +845,9 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev) Because new from 8.3.8 onwards the peer can use multiple BIOs for a single peer_request */ if (mdev->state.conn >= C_CONNECTED) { - if (mdev->agreed_pro_version < 94) + if (mdev->tconn->agreed_pro_version < 94) peer = mdev->peer_max_bio_size; - else if (mdev->agreed_pro_version == 94) + else if (mdev->tconn->agreed_pro_version == 94) peer = DRBD_MAX_SIZE_H80_PACKET; else /* drbd 8.3.8 onwards */ peer = DRBD_MAX_BIO_SIZE; @@ -1675,7 +1675,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, goto fail; } - if (rs.no_resync && mdev->agreed_pro_version < 93) { + if (rs.no_resync && mdev->tconn->agreed_pro_version < 93) { retcode = ERR_NEED_APV_93; goto fail; } @@ -2170,7 +2170,7 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl } /* this is "skip initial sync", assume to be clean */ - if (mdev->state.conn == C_CONNECTED && mdev->agreed_pro_version >= 90 && + if (mdev->state.conn == C_CONNECTED && mdev->tconn->agreed_pro_version >= 90 && mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) { dev_info(DEV, "Preparing to skip initial sync\n"); skip_initial_sync = 1; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e9f670cd554..27a8363510d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -872,7 +872,7 @@ retry: mdev->tconn->data.socket = sock; mdev->tconn->meta.socket = msock; - mdev->last_received = jiffies; + mdev->tconn->last_received = jiffies; D_ASSERT(mdev->tconn->asender.task == NULL); @@ -948,7 +948,7 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi be16_to_cpu(h->h80.length)); return false; } - mdev->last_received = jiffies; + mdev->tconn->last_received = jiffies; return true; } @@ -1244,7 +1244,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __ void *dig_vv = mdev->int_dig_vv; unsigned long *data; - dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? + dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? crypto_hash_digestsize(mdev->integrity_r_tfm) : 0; if (dgs) { @@ -1361,7 +1361,7 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, void *dig_in = mdev->int_dig_in; void *dig_vv = mdev->int_dig_vv; - dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? + dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? crypto_hash_digestsize(mdev->integrity_r_tfm) : 0; if (dgs) { @@ -2048,7 +2048,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un goto out_free_e; if (cmd == P_CSUM_RS_REQUEST) { - D_ASSERT(mdev->agreed_pro_version >= 89); + D_ASSERT(mdev->tconn->agreed_pro_version >= 89); e->w.cb = w_e_end_csum_rs_req; /* used in the sector offset progress display */ mdev->bm_resync_fo = BM_SECT_TO_BIT(sector); @@ -2065,7 +2065,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un case P_OV_REQUEST: if (mdev->ov_start_sector == ~(sector_t)0 && - mdev->agreed_pro_version >= 90) { + mdev->tconn->agreed_pro_version >= 90) { unsigned long now = jiffies; int i; mdev->ov_start_sector = sector; @@ -2360,7 +2360,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) { - if (mdev->agreed_pro_version < 91) + if (mdev->tconn->agreed_pro_version < 91) return -1091; if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) && @@ -2381,7 +2381,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) { - if (mdev->agreed_pro_version < 91) + if (mdev->tconn->agreed_pro_version < 91) return -1091; if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) && @@ -2427,14 +2427,14 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l *rule_nr = 51; peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1); if (self == peer) { - if (mdev->agreed_pro_version < 96 ? + if (mdev->tconn->agreed_pro_version < 96 ? (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) : peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) { /* The last P_SYNC_UUID did not get though. Undo the last start of resync as sync source modifications of the peer's UUIDs. */ - if (mdev->agreed_pro_version < 91) + if (mdev->tconn->agreed_pro_version < 91) return -1091; mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START]; @@ -2464,14 +2464,14 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l *rule_nr = 71; self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1); if (self == peer) { - if (mdev->agreed_pro_version < 96 ? + if (mdev->tconn->agreed_pro_version < 96 ? (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) : self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) { /* The last P_SYNC_UUID did not get though. Undo the last start of resync as sync source modifications of our UUIDs. */ - if (mdev->agreed_pro_version < 91) + if (mdev->tconn->agreed_pro_version < 91) return -1091; _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]); @@ -2731,7 +2731,7 @@ static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsig goto disconnect; } - if (mdev->agreed_pro_version >= 87) { + if (mdev->tconn->agreed_pro_version >= 87) { unsigned char *my_alg = mdev->tconn->net_conf->integrity_alg; if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size) @@ -2787,7 +2787,7 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi unsigned int header_size, data_size, exp_max_sz; struct crypto_hash *verify_tfm = NULL; struct crypto_hash *csums_tfm = NULL; - const int apv = mdev->agreed_pro_version; + const int apv = mdev->tconn->agreed_pro_version; int *rs_plan_s = NULL; int fifo_size = 0; @@ -3074,7 +3074,7 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned if (get_ldev(mdev)) { int skip_initial_sync = mdev->state.conn == C_CONNECTED && - mdev->agreed_pro_version >= 90 && + mdev->tconn->agreed_pro_version >= 90 && mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && (p_uuid[UI_FLAGS] & 8); if (skip_initial_sync) { @@ -3967,10 +3967,10 @@ static int drbd_do_handshake(struct drbd_conf *mdev) PRO_VERSION_MIN > p->protocol_max) goto incompat; - mdev->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max); + mdev->tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max); dev_info(DEV, "Handshake successful: " - "Agreed network protocol version %d\n", mdev->agreed_pro_version); + "Agreed network protocol version %d\n", mdev->tconn->agreed_pro_version); return 1; @@ -4220,7 +4220,7 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) sector_t sector = be64_to_cpu(p->sector); int blksize = be32_to_cpu(p->blksize); - D_ASSERT(mdev->agreed_pro_version >= 89); + D_ASSERT(mdev->tconn->agreed_pro_version >= 89); update_peer_seq(mdev, be32_to_cpu(p->seq_num)); @@ -4560,7 +4560,7 @@ int drbd_asender(struct drbd_thread *thi) } else if (rv == -EAGAIN) { /* If the data socket received something meanwhile, * that is good enough: peer is still alive. */ - if (time_after(mdev->last_received, + if (time_after(mdev->tconn->last_received, jiffies - mdev->tconn->meta.socket->sk->sk_rcvtimeo)) continue; if (ping_timeout_active) { @@ -4598,7 +4598,7 @@ int drbd_asender(struct drbd_thread *thi) goto reconnect; } if (received == expect) { - mdev->last_received = jiffies; + mdev->tconn->last_received = jiffies; D_ASSERT(cmd != NULL); if (!cmd->process(mdev, h)) goto reconnect; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index ac43e440d66..c871ef2414f 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -951,7 +951,7 @@ allocate_barrier: _req_mod(req, QUEUE_FOR_SEND_OOS); if (remote && - mdev->tconn->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) { + mdev->tconn->net_conf->on_congestion != OC_BLOCK && mdev->tconn->agreed_pro_version >= 96) { int congested = 0; if (mdev->tconn->net_conf->cong_fill && diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 1ca7856f813..ec26df37884 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -616,7 +616,7 @@ next_sector: /* adjust very last sectors, in case we are oddly sized */ if (sector + (size>>9) > capacity) size = (capacity-sector)<<9; - if (mdev->agreed_pro_version >= 89 && mdev->csums_tfm) { + if (mdev->tconn->agreed_pro_version >= 89 && mdev->csums_tfm) { switch (read_for_csum(mdev, sector, size)) { case -EIO: /* Disk failure */ put_ldev(mdev); @@ -1574,10 +1574,10 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) * drbd_resync_finished from here in that case. * We drbd_gen_and_send_sync_uuid here for protocol < 96, * and from after_state_ch otherwise. */ - if (side == C_SYNC_SOURCE && mdev->agreed_pro_version < 96) + if (side == C_SYNC_SOURCE && mdev->tconn->agreed_pro_version < 96) drbd_gen_and_send_sync_uuid(mdev); - if (mdev->agreed_pro_version < 95 && mdev->rs_total == 0) { + if (mdev->tconn->agreed_pro_version < 95 && mdev->rs_total == 0) { /* This still has a race (about when exactly the peers * detect connection loss) that can lead to a full sync * on next handshake. In 8.3.9 we fixed this with explicit From 87eeee41f8740451b61a1e7d37a494333a906861 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 14:16:30 +0100 Subject: [PATCH 034/609] drbd: moved req_lock and transfer log from mdev to tconn sed -i \ -e 's/mdev->req_lock/mdev->tconn->req_lock/g' \ -e 's/mdev->unused_spare_tle/mdev->tconn->unused_spare_tle/g' \ -e 's/mdev->newest_tle/mdev->tconn->newest_tle/g' \ -e 's/mdev->oldest_tle/mdev->tconn->oldest_tle/g' \ -e 's/mdev->out_of_sequence_requests/mdev->tconn->out_of_sequence_requests/g' \ *.[ch] Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 20 +++--- drivers/block/drbd/drbd_main.c | 100 ++++++++++++++--------------- drivers/block/drbd/drbd_nl.c | 34 +++++----- drivers/block/drbd/drbd_receiver.c | 96 +++++++++++++-------------- drivers/block/drbd/drbd_req.c | 48 +++++++------- drivers/block/drbd/drbd_req.h | 4 +- drivers/block/drbd/drbd_worker.c | 38 +++++------ 7 files changed, 170 insertions(+), 170 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 9efe499b112..b440ffd1498 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -976,6 +976,12 @@ struct drbd_tconn { /* is a resource from the config file */ unsigned long last_received; /* in jiffies, either socket */ unsigned int ko_count; + spinlock_t req_lock; + struct drbd_tl_epoch *unused_spare_tle; /* for pre-allocation */ + struct drbd_tl_epoch *newest_tle; + struct drbd_tl_epoch *oldest_tle; + struct list_head out_of_sequence_requests; + struct drbd_thread receiver; struct drbd_thread worker; struct drbd_thread asender; @@ -1031,12 +1037,6 @@ struct drbd_conf { atomic_t unacked_cnt; /* Need to send replys for */ atomic_t local_cnt; /* Waiting for local completion */ - spinlock_t req_lock; - struct drbd_tl_epoch *unused_spare_tle; /* for pre-allocation */ - struct drbd_tl_epoch *newest_tle; - struct drbd_tl_epoch *oldest_tle; - struct list_head out_of_sequence_requests; - /* Interval tree of pending local requests */ struct rb_root read_requests; struct rb_root write_requests; @@ -1868,9 +1868,9 @@ static inline void drbd_chk_io_error_(struct drbd_conf *mdev, { if (error) { unsigned long flags; - spin_lock_irqsave(&mdev->req_lock, flags); + spin_lock_irqsave(&mdev->tconn->req_lock, flags); __drbd_chk_io_error_(mdev, forcedetach, where); - spin_unlock_irqrestore(&mdev->req_lock, flags); + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); } } @@ -2366,11 +2366,11 @@ static inline bool inc_ap_bio_cond(struct drbd_conf *mdev, int count) { bool rv = false; - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); rv = may_inc_ap_bio(mdev); if (rv) atomic_add(count, &mdev->ap_bio_cnt); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); return rv; } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index e06ca4a0d90..c063cd51322 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -185,7 +185,7 @@ int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) * DOC: The transfer log * * The transfer log is a single linked list of &struct drbd_tl_epoch objects. - * mdev->newest_tle points to the head, mdev->oldest_tle points to the tail + * mdev->tconn->newest_tle points to the head, mdev->tconn->oldest_tle points to the tail * of the list. There is always at least one &struct drbd_tl_epoch object. * * Each &struct drbd_tl_epoch has a circular double linked list of requests @@ -206,21 +206,21 @@ static int tl_init(struct drbd_conf *mdev) b->n_writes = 0; b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */ - mdev->oldest_tle = b; - mdev->newest_tle = b; - INIT_LIST_HEAD(&mdev->out_of_sequence_requests); + mdev->tconn->oldest_tle = b; + mdev->tconn->newest_tle = b; + INIT_LIST_HEAD(&mdev->tconn->out_of_sequence_requests); return 1; } static void tl_cleanup(struct drbd_conf *mdev) { - D_ASSERT(mdev->oldest_tle == mdev->newest_tle); - D_ASSERT(list_empty(&mdev->out_of_sequence_requests)); - kfree(mdev->oldest_tle); - mdev->oldest_tle = NULL; - kfree(mdev->unused_spare_tle); - mdev->unused_spare_tle = NULL; + D_ASSERT(mdev->tconn->oldest_tle == mdev->tconn->newest_tle); + D_ASSERT(list_empty(&mdev->tconn->out_of_sequence_requests)); + kfree(mdev->tconn->oldest_tle); + mdev->tconn->oldest_tle = NULL; + kfree(mdev->tconn->unused_spare_tle); + mdev->tconn->unused_spare_tle = NULL; } /** @@ -240,13 +240,13 @@ void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new) new->next = NULL; new->n_writes = 0; - newest_before = mdev->newest_tle; + newest_before = mdev->tconn->newest_tle; /* never send a barrier number == 0, because that is special-cased * when using TCQ for our write ordering code */ new->br_number = (newest_before->br_number+1) ?: 1; - if (mdev->newest_tle != new) { - mdev->newest_tle->next = new; - mdev->newest_tle = new; + if (mdev->tconn->newest_tle != new) { + mdev->tconn->newest_tle->next = new; + mdev->tconn->newest_tle = new; } } @@ -267,9 +267,9 @@ void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, struct list_head *le, *tle; struct drbd_request *r; - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); - b = mdev->oldest_tle; + b = mdev->tconn->oldest_tle; /* first some paranoia code */ if (b == NULL) { @@ -312,22 +312,22 @@ void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { _tl_add_barrier(mdev, b); if (nob) - mdev->oldest_tle = nob; + mdev->tconn->oldest_tle = nob; /* if nob == NULL b was the only barrier, and becomes the new - barrier. Therefore mdev->oldest_tle points already to b */ + barrier. Therefore mdev->tconn->oldest_tle points already to b */ } else { D_ASSERT(nob != NULL); - mdev->oldest_tle = nob; + mdev->tconn->oldest_tle = nob; kfree(b); } - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); dec_ap_pending(mdev); return; bail: - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); } @@ -347,8 +347,8 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) struct drbd_request *req; int rv, n_writes, n_reads; - b = mdev->oldest_tle; - pn = &mdev->oldest_tle; + b = mdev->tconn->oldest_tle; + pn = &mdev->tconn->oldest_tle; while (b) { n_writes = 0; n_reads = 0; @@ -387,7 +387,7 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) if (b->w.cb != NULL) dec_ap_pending(mdev); - if (b == mdev->newest_tle) { + if (b == mdev->tconn->newest_tle) { /* recycle, but reinit! */ D_ASSERT(tmp == NULL); INIT_LIST_HEAD(&b->requests); @@ -422,15 +422,15 @@ void tl_clear(struct drbd_conf *mdev) struct list_head *le, *tle; struct drbd_request *r; - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); _tl_restart(mdev, CONNECTION_LOST_WHILE_PENDING); /* we expect this list to be empty. */ - D_ASSERT(list_empty(&mdev->out_of_sequence_requests)); + D_ASSERT(list_empty(&mdev->tconn->out_of_sequence_requests)); /* but just in case, clean it up anyways! */ - list_for_each_safe(le, tle, &mdev->out_of_sequence_requests) { + list_for_each_safe(le, tle, &mdev->tconn->out_of_sequence_requests) { r = list_entry(le, struct drbd_request, tl_requests); /* It would be nice to complete outside of spinlock. * But this is easier for now. */ @@ -440,14 +440,14 @@ void tl_clear(struct drbd_conf *mdev) /* ensure bit indicating barrier is required is clear */ clear_bit(CREATE_BARRIER, &mdev->flags); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); } void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) { - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); _tl_restart(mdev, what); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); } /** @@ -476,12 +476,12 @@ drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, union drbd_state os, ns; enum drbd_state_rv rv; - spin_lock_irqsave(&mdev->req_lock, flags); + spin_lock_irqsave(&mdev->tconn->req_lock, flags); os = mdev->state; ns.i = (os.i & ~mask.i) | val.i; rv = _drbd_set_state(mdev, ns, f, NULL); ns = mdev->state; - spin_unlock_irqrestore(&mdev->req_lock, flags); + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); return rv; } @@ -522,7 +522,7 @@ _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, return SS_CW_FAILED_BY_PEER; rv = 0; - spin_lock_irqsave(&mdev->req_lock, flags); + spin_lock_irqsave(&mdev->tconn->req_lock, flags); os = mdev->state; ns.i = (os.i & ~mask.i) | val.i; ns = sanitize_state(mdev, os, ns, NULL); @@ -537,7 +537,7 @@ _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ } } - spin_unlock_irqrestore(&mdev->req_lock, flags); + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); return rv; } @@ -566,7 +566,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, if (f & CS_SERIALIZE) mutex_lock(&mdev->state_mutex); - spin_lock_irqsave(&mdev->req_lock, flags); + spin_lock_irqsave(&mdev->tconn->req_lock, flags); os = mdev->state; ns.i = (os.i & ~mask.i) | val.i; ns = sanitize_state(mdev, os, ns, NULL); @@ -575,7 +575,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, rv = is_valid_state(mdev, ns); if (rv == SS_SUCCESS) rv = is_valid_state_transition(mdev, ns, os); - spin_unlock_irqrestore(&mdev->req_lock, flags); + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); if (rv < SS_SUCCESS) { if (f & CS_VERBOSE) @@ -601,7 +601,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, print_st_err(mdev, os, ns, rv); goto abort; } - spin_lock_irqsave(&mdev->req_lock, flags); + spin_lock_irqsave(&mdev->tconn->req_lock, flags); os = mdev->state; ns.i = (os.i & ~mask.i) | val.i; rv = _drbd_set_state(mdev, ns, f, &done); @@ -610,7 +610,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, rv = _drbd_set_state(mdev, ns, f, &done); } - spin_unlock_irqrestore(&mdev->req_lock, flags); + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) { D_ASSERT(current != mdev->tconn->worker.task); @@ -1367,9 +1367,9 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, drbd_uuid_new_current(mdev); clear_bit(NEW_CUR_UUID, &mdev->flags); } - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); } /* case2: The connection was established again: */ if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { @@ -1380,11 +1380,11 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, } if (what != NOTHING) { - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); _tl_restart(mdev, what); nsm.i &= mdev->state.i; _drbd_set_state(mdev, nsm, CS_VERBOSE, NULL); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); } /* Became sync source. With protocol >= 96, we still need to send out @@ -2898,7 +2898,7 @@ static int drbd_open(struct block_device *bdev, fmode_t mode) int rv = 0; mutex_lock(&drbd_main_mutex); - spin_lock_irqsave(&mdev->req_lock, flags); + spin_lock_irqsave(&mdev->tconn->req_lock, flags); /* to have a stable mdev->state.role * and no race with updating open_cnt */ @@ -2911,7 +2911,7 @@ static int drbd_open(struct block_device *bdev, fmode_t mode) if (!rv) mdev->open_cnt++; - spin_unlock_irqrestore(&mdev->req_lock, flags); + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); mutex_unlock(&drbd_main_mutex); return rv; @@ -2990,7 +2990,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) spin_lock_init(&mdev->tconn->meta.work.q_lock); spin_lock_init(&mdev->al_lock); - spin_lock_init(&mdev->req_lock); + spin_lock_init(&mdev->tconn->req_lock); spin_lock_init(&mdev->peer_seq_lock); spin_lock_init(&mdev->epoch_lock); @@ -3451,7 +3451,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor) blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8); blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); blk_queue_merge_bvec(q, drbd_merge_bvec); - q->queue_lock = &mdev->req_lock; + q->queue_lock = &mdev->tconn->req_lock; /* needed since we use */ mdev->md_io_page = alloc_page(GFP_KERNEL); if (!mdev->md_io_page) @@ -3784,14 +3784,14 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents); bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); if (mdev->state.conn < C_CONNECTED) { int peer; peer = be32_to_cpu(buffer->la_peer_max_bio_size); peer = max_t(int, peer, DRBD_MAX_BIO_SIZE_SAFE); mdev->peer_max_bio_size = peer; } - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (mdev->sync_conf.al_extents < 7) mdev->sync_conf.al_extents = 127; @@ -4046,13 +4046,13 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev, mdev->bm_io_work.why = why; mdev->bm_io_work.flags = flags; - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); set_bit(BITMAP_IO, &mdev->flags); if (atomic_read(&mdev->ap_bio_cnt) == 0) { if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) drbd_queue_work(&mdev->tconn->data.work, &mdev->bm_io_work.w); } - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); } /** diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index a9ede8fc888..4eaf81a463b 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -287,13 +287,13 @@ static int _try_outdate_peer_async(void *data) pdsk == D_INCONSISTENT while conn >= C_CONNECTED is valid, therefore we have to have the pre state change check here. */ - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); ns = mdev->state; if (ns.conn < C_WF_REPORT_PARAMS) { ns.pdsk = nps; _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); } - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); return 0; } @@ -884,7 +884,7 @@ static void drbd_reconfig_start(struct drbd_conf *mdev) * wakes potential waiters */ static void drbd_reconfig_done(struct drbd_conf *mdev) { - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); if (mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE && mdev->state.role == R_SECONDARY) { @@ -892,7 +892,7 @@ static void drbd_reconfig_done(struct drbd_conf *mdev) drbd_thread_stop_nowait(&mdev->tconn->worker); } else clear_bit(CONFIG_PENDING, &mdev->flags); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); wake_up(&mdev->state_wait); } @@ -909,11 +909,11 @@ static void drbd_suspend_al(struct drbd_conf *mdev) return; } - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); if (mdev->state.conn < C_CONNECTED) s = !test_and_set_bit(AL_SUSPENDED, &mdev->flags); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (s) dev_info(DEV, "Suspended AL updates\n"); @@ -1240,7 +1240,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev)) drbd_suspend_al(mdev); /* IO is still suspended here... */ - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); os = mdev->state; ns.i = os.i; /* If MDF_CONSISTENT is not set go into inconsistent state, @@ -1285,7 +1285,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); ns = mdev->state; - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (rv < SS_SUCCESS) goto force_diskless_dec; @@ -1521,10 +1521,10 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, } drbd_flush_workqueue(mdev); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); if (mdev->tconn->net_conf != NULL) { retcode = ERR_NET_CONFIGURED; - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); goto fail; } mdev->tconn->net_conf = new_conf; @@ -1548,7 +1548,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, mdev->int_dig_in=int_dig_in; mdev->int_dig_vv=int_dig_vv; retcode = _drbd_set_state(_NS(mdev, conn, C_UNCONNECTED), CS_VERBOSE, NULL); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); reply->ret_code = retcode; @@ -1582,10 +1582,10 @@ static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl } if (dc.force) { - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); if (mdev->state.conn >= C_WF_CONNECTION) _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), CS_HARD, NULL); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); goto done; } @@ -1917,10 +1917,10 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); while (retcode == SS_NEED_CONNECTION) { - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); if (mdev->state.conn < C_CONNECTED) retcode = _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_VERBOSE, NULL); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (retcode != SS_NEED_CONNECTION) break; @@ -2193,10 +2193,10 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl drbd_send_uuids_skip_initial_sync(mdev); _drbd_uuid_set(mdev, UI_BITMAP, 0); drbd_print_uuids(mdev, "cleared bitmap UUID"); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), CS_VERBOSE, NULL); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); } } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 27a8363510d..af968a0bae0 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -210,9 +210,9 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev) LIST_HEAD(reclaimed); struct drbd_epoch_entry *e, *t; - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); reclaim_net_ee(mdev, &reclaimed); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); list_for_each_entry_safe(e, t, &reclaimed, w.list) drbd_free_net_ee(mdev, e); @@ -269,7 +269,7 @@ static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool } /* Must not be used from irq, as that may deadlock: see drbd_pp_alloc. - * Is also used from inside an other spin_lock_irq(&mdev->req_lock); + * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock); * Either links the page chain back to the global pool, * or returns all pages to the system. */ static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net) @@ -371,9 +371,9 @@ int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list) int count = 0; int is_net = list == &mdev->net_ee; - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); list_splice_init(list, &work_list); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); list_for_each_entry_safe(e, t, &work_list, w.list) { drbd_free_some_ee(mdev, e, is_net); @@ -399,10 +399,10 @@ static int drbd_process_done_ee(struct drbd_conf *mdev) struct drbd_epoch_entry *e, *t; int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); reclaim_net_ee(mdev, &reclaimed); list_splice_init(&mdev->done_ee, &work_list); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); list_for_each_entry_safe(e, t, &reclaimed, w.list) drbd_free_net_ee(mdev, e); @@ -429,18 +429,18 @@ void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head) * and calling prepare_to_wait in the fast path */ while (!list_empty(head)) { prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); io_schedule(); finish_wait(&mdev->ee_wait, &wait); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); } } void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head) { - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); _drbd_wait_ee_list_empty(mdev, head); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); } /* see also kernel_accept; which is only present since 2.6.18. @@ -1452,9 +1452,9 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si e->w.cb = e_end_resync_block; - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); list_add(&e->w.list, &mdev->sync_ee); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); atomic_add(data_size >> 9, &mdev->rs_sect_ev); if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0) @@ -1462,9 +1462,9 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si /* don't care for the reason here */ dev_err(DEV, "submit failed, triggering re-connect\n"); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); list_del(&e->w.list); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); drbd_free_ee(mdev, e); fail: @@ -1498,9 +1498,9 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi sector = be64_to_cpu(p->sector); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (unlikely(!req)) return false; @@ -1574,11 +1574,11 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) /* we delete from the conflict detection hash _after_ we sent out the * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ if (mdev->tconn->net_conf->two_primaries) { - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); D_ASSERT(!drbd_interval_empty(&e->i)); drbd_remove_interval(&mdev->epoch_entries, &e->i); drbd_clear_interval(&e->i); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); } else D_ASSERT(drbd_interval_empty(&e->i)); @@ -1595,11 +1595,11 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); ok = drbd_send_ack(mdev, P_DISCARD_ACK, e); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); D_ASSERT(!drbd_interval_empty(&e->i)); drbd_remove_interval(&mdev->epoch_entries, &e->i); drbd_clear_interval(&e->i); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); dec_unacked(mdev); @@ -1718,7 +1718,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned /* I'm the receiver, I do hold a net_cnt reference. */ if (!mdev->tconn->net_conf->two_primaries) { - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); } else { /* don't get the req_lock yet, * we may sleep in drbd_wait_peer_seq */ @@ -1765,7 +1765,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned if (drbd_wait_peer_seq(mdev, be32_to_cpu(p->seq_num))) goto out_interrupted; - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); drbd_insert_interval(&mdev->epoch_entries, &e->i); @@ -1805,7 +1805,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned e->w.cb = e_send_discard_ack; list_add_tail(&e->w.list, &mdev->done_ee); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); /* we could probably send that P_DISCARD_ACK ourselves, * but I don't like the receiver using the msock */ @@ -1820,13 +1820,13 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned drbd_remove_interval(&mdev->epoch_entries, &e->i); drbd_clear_interval(&e->i); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); finish_wait(&mdev->misc_wait, &wait); goto out_interrupted; } - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (first) { first = 0; dev_alert(DEV, "Concurrent write! [W AFTERWARDS] " @@ -1837,13 +1837,13 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned D_ASSERT(have_unacked == 0); } schedule(); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); } finish_wait(&mdev->misc_wait, &wait); } list_add(&e->w.list, &mdev->active_ee); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); switch (mdev->tconn->net_conf->wire_protocol) { case DRBD_PROT_C: @@ -1874,11 +1874,11 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned /* don't care for the reason here */ dev_err(DEV, "submit failed, triggering re-connect\n"); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); list_del(&e->w.list); drbd_remove_interval(&mdev->epoch_entries, &e->i); drbd_clear_interval(&e->i); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (e->flags & EE_CALL_AL_COMPLETE_IO) drbd_al_complete_io(mdev, e->i.sector); @@ -2122,18 +2122,18 @@ submit_for_resync: submit: inc_unacked(mdev); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); list_add_tail(&e->w.list, &mdev->read_ee); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (drbd_submit_ee(mdev, e, READ, fault_type) == 0) return true; /* don't care for the reason here */ dev_err(DEV, "submit failed, triggering re-connect\n"); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); list_del(&e->w.list); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); /* no drbd_rs_complete_io(), we are dropping the connection anyways */ out_free_e: @@ -3183,10 +3183,10 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk)); } - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); retry: os = ns = mdev->state; - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); /* peer says his disk is uptodate, while we think it is inconsistent, * and this happens while we think we have a sync going on. */ @@ -3270,7 +3270,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned } } - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); if (mdev->state.i != os.i) goto retry; clear_bit(CONSIDER_RESYNC, &mdev->flags); @@ -3284,7 +3284,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned test_bit(NEW_CUR_UUID, &mdev->flags)) { /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this for temporal network outages! */ - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n"); tl_clear(mdev); drbd_uuid_new_current(mdev); @@ -3294,7 +3294,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned } rv = _drbd_set_state(mdev, ns, cs_flags, NULL); ns = mdev->state; - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (rv < SS_SUCCESS) { drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); @@ -3772,11 +3772,11 @@ static void drbd_disconnect(struct drbd_conf *mdev) drbd_free_sock(mdev); /* wait for current activity to cease. */ - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); _drbd_wait_ee_list_empty(mdev, &mdev->active_ee); _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee); _drbd_wait_ee_list_empty(mdev, &mdev->read_ee); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); /* We do not have data structures that would allow us to * get the rs_pending_cnt down to 0 again. @@ -3828,7 +3828,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN) drbd_try_outdate_peer_async(mdev); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); os = mdev->state; if (os.conn >= C_UNCONNECTED) { /* Do not restart in case we are C_DISCONNECTING */ @@ -3836,7 +3836,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) ns.conn = C_UNCONNECTED; rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); } - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (os.conn == C_DISCONNECTING) { wait_event(mdev->tconn->net_cnt_wait, atomic_read(&mdev->tconn->net_cnt) == 0); @@ -4245,14 +4245,14 @@ validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector, struct drbd_request *req; struct bio_and_error m; - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); req = find_request(mdev, root, id, sector, missing_ok, func); if (unlikely(!req)) { - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); return false; } __req_mod(req, what, &m); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (m.bio) complete_master_bio(mdev, &m); @@ -4518,9 +4518,9 @@ int drbd_asender(struct drbd_thread *thi) goto reconnect; /* to avoid race with newly queued ACKs */ set_bit(SIGNAL_ASENDER, &mdev->flags); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); empty = list_empty(&mdev->done_ee); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); /* new ack may have been queued right here, * but then there is also a signal pending, * and we start over... */ diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index c871ef2414f..74179f7986e 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -120,7 +120,7 @@ static void queue_barrier(struct drbd_conf *mdev) if (test_bit(CREATE_BARRIER, &mdev->flags)) return; - b = mdev->newest_tle; + b = mdev->tconn->newest_tle; b->w.cb = w_send_barrier; /* inc_ap_pending done here, so we won't * get imbalanced on connection loss. @@ -144,7 +144,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, */ if (mdev->state.conn >= C_CONNECTED && (s & RQ_NET_SENT) != 0 && - req->epoch == mdev->newest_tle->br_number) + req->epoch == mdev->tconn->newest_tle->br_number) queue_barrier(mdev); /* we need to do the conflict detection stuff, @@ -516,10 +516,10 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * just after it grabs the req_lock */ D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0); - req->epoch = mdev->newest_tle->br_number; + req->epoch = mdev->tconn->newest_tle->br_number; /* increment size of current epoch */ - mdev->newest_tle->n_writes++; + mdev->tconn->newest_tle->n_writes++; /* queue work item to send data */ D_ASSERT(req->rq_state & RQ_NET_PENDING); @@ -528,7 +528,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, drbd_queue_work(&mdev->tconn->data.work, &req->w); /* close the epoch, in case it outgrew the limit */ - if (mdev->newest_tle->n_writes >= mdev->tconn->net_conf->max_epoch_size) + if (mdev->tconn->newest_tle->n_writes >= mdev->tconn->net_conf->max_epoch_size) queue_barrier(mdev); break; @@ -693,7 +693,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * this is bad, because if the connection is lost now, * we won't be able to clean them up... */ dev_err(DEV, "FIXME (BARRIER_ACKED but pending)\n"); - list_move(&req->tl_requests, &mdev->out_of_sequence_requests); + list_move(&req->tl_requests, &mdev->tconn->out_of_sequence_requests); } if ((req->rq_state & RQ_NET_MASK) != 0) { req->rq_state |= RQ_NET_DONE; @@ -834,7 +834,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns * spinlock, and grabbing the spinlock. * if we lost that race, we retry. */ if (rw == WRITE && (remote || send_oos) && - mdev->unused_spare_tle == NULL && + mdev->tconn->unused_spare_tle == NULL && test_bit(CREATE_BARRIER, &mdev->flags)) { allocate_barrier: b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_NOIO); @@ -846,7 +846,7 @@ allocate_barrier: } /* GOOD, everything prepared, grab the spin_lock */ - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); if (is_susp(mdev->state)) { /* If we got suspended, use the retry mechanism of @@ -854,7 +854,7 @@ allocate_barrier: bio. In the next call to drbd_make_request we sleep in inc_ap_bio() */ ret = 1; - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); goto fail_free_complete; } @@ -867,21 +867,21 @@ allocate_barrier: dev_warn(DEV, "lost connection while grabbing the req_lock!\n"); if (!(local || remote)) { dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); goto fail_free_complete; } } - if (b && mdev->unused_spare_tle == NULL) { - mdev->unused_spare_tle = b; + if (b && mdev->tconn->unused_spare_tle == NULL) { + mdev->tconn->unused_spare_tle = b; b = NULL; } if (rw == WRITE && (remote || send_oos) && - mdev->unused_spare_tle == NULL && + mdev->tconn->unused_spare_tle == NULL && test_bit(CREATE_BARRIER, &mdev->flags)) { /* someone closed the current epoch * while we were grabbing the spinlock */ - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); goto allocate_barrier; } @@ -899,10 +899,10 @@ allocate_barrier: * barrier packet. To get the write ordering right, we only have to * make sure that, if this is a write request and it triggered a * barrier packet, this request is queued within the same spinlock. */ - if ((remote || send_oos) && mdev->unused_spare_tle && + if ((remote || send_oos) && mdev->tconn->unused_spare_tle && test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { - _tl_add_barrier(mdev, mdev->unused_spare_tle); - mdev->unused_spare_tle = NULL; + _tl_add_barrier(mdev, mdev->tconn->unused_spare_tle); + mdev->tconn->unused_spare_tle = NULL; } else { D_ASSERT(!(remote && rw == WRITE && test_bit(CREATE_BARRIER, &mdev->flags))); @@ -934,7 +934,7 @@ allocate_barrier: if (rw == WRITE && _req_conflicts(req)) goto fail_conflicting; - list_add_tail(&req->tl_requests, &mdev->newest_tle->requests); + list_add_tail(&req->tl_requests, &mdev->tconn->newest_tle->requests); /* NOTE remote first: to get the concurrent write detection right, * we must register the request before start of local IO. */ @@ -975,7 +975,7 @@ allocate_barrier: } } - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); kfree(b); /* if someone else has beaten us to it... */ if (local) { @@ -1008,7 +1008,7 @@ fail_conflicting: * pretend that it was successfully served right now. */ _drbd_end_io_acct(mdev, req); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (remote) dec_ap_pending(mdev); /* THINK: do we want to fail it (-EIO), or pretend success? @@ -1188,10 +1188,10 @@ void request_timer_fn(unsigned long data) if (!et || mdev->state.conn < C_WF_REPORT_PARAMS) return; /* Recurring timer stopped */ - spin_lock_irq(&mdev->req_lock); - le = &mdev->oldest_tle->requests; + spin_lock_irq(&mdev->tconn->req_lock); + le = &mdev->tconn->oldest_tle->requests; if (list_empty(le)) { - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); mod_timer(&mdev->request_timer, jiffies + et); return; } @@ -1210,5 +1210,5 @@ void request_timer_fn(unsigned long data) mod_timer(&mdev->request_timer, req->start_time + et); } - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); } diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 9d75647cae8..4b0858bf286 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -305,9 +305,9 @@ static inline int req_mod(struct drbd_request *req, struct bio_and_error m; int rv; - spin_lock_irqsave(&mdev->req_lock, flags); + spin_lock_irqsave(&mdev->tconn->req_lock, flags); rv = __req_mod(req, what, &m); - spin_unlock_irqrestore(&mdev->req_lock, flags); + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); if (m.bio) complete_master_bio(mdev, &m); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index ec26df37884..671251af6bc 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -85,14 +85,14 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local) unsigned long flags = 0; struct drbd_conf *mdev = e->mdev; - spin_lock_irqsave(&mdev->req_lock, flags); + spin_lock_irqsave(&mdev->tconn->req_lock, flags); mdev->read_cnt += e->i.size >> 9; list_del(&e->w.list); if (list_empty(&mdev->read_ee)) wake_up(&mdev->ee_wait); if (test_bit(__EE_WAS_ERROR, &e->flags)) __drbd_chk_io_error(mdev, false); - spin_unlock_irqrestore(&mdev->req_lock, flags); + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); drbd_queue_work(&mdev->tconn->data.work, &e->w); put_ldev(mdev); @@ -117,7 +117,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo do_al_complete_io = e->flags & EE_CALL_AL_COMPLETE_IO; block_id = e->block_id; - spin_lock_irqsave(&mdev->req_lock, flags); + spin_lock_irqsave(&mdev->tconn->req_lock, flags); mdev->writ_cnt += e->i.size >> 9; list_del(&e->w.list); /* has been on active_ee or sync_ee */ list_add_tail(&e->w.list, &mdev->done_ee); @@ -134,7 +134,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo if (test_bit(__EE_WAS_ERROR, &e->flags)) __drbd_chk_io_error(mdev, false); - spin_unlock_irqrestore(&mdev->req_lock, flags); + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); if (block_id == ID_SYNCER) drbd_rs_complete_io(mdev, e_sector); @@ -220,9 +220,9 @@ void drbd_endio_pri(struct bio *bio, int error) req->private_bio = ERR_PTR(error); /* not req_mod(), we need irqsave here! */ - spin_lock_irqsave(&mdev->req_lock, flags); + spin_lock_irqsave(&mdev->tconn->req_lock, flags); __req_mod(req, what, &m); - spin_unlock_irqrestore(&mdev->req_lock, flags); + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); if (m.bio) complete_master_bio(mdev, &m); @@ -236,13 +236,13 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * but try to WRITE the P_DATA_REPLY to the failed location, * to give the disk the chance to relocate that block */ - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); if (cancel || mdev->state.pdsk != D_UP_TO_DATE) { _req_mod(req, READ_RETRY_REMOTE_CANCELED); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); return 1; } - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); return w_send_read_req(mdev, w, 0); } @@ -359,9 +359,9 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) goto defer; e->w.cb = w_e_send_csum; - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); list_add(&e->w.list, &mdev->read_ee); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); atomic_add(size >> 9, &mdev->rs_sect_ev); if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0) @@ -371,9 +371,9 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) * because bio_add_page failed (probably broken lower level driver), * retry may or may not help. * If it does not, you may need to force disconnect. */ - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); list_del(&e->w.list); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); drbd_free_ee(mdev, e); defer: @@ -793,7 +793,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) ping_peer(mdev); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); os = mdev->state; verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T); @@ -882,7 +882,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); out_unlock: - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); put_ldev(mdev); out: mdev->rs_total = 0; @@ -907,9 +907,9 @@ static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_epoch_ent int i = (e->i.size + PAGE_SIZE -1) >> PAGE_SHIFT; atomic_add(i, &mdev->pp_in_use_by_net); atomic_sub(i, &mdev->pp_in_use); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); list_add_tail(&e->w.list, &mdev->net_ee); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); wake_up(&drbd_pp_wait); } else drbd_free_ee(mdev, e); @@ -1210,10 +1210,10 @@ int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * actually, this race was harmless, since we only try to send the * barrier packet here, and otherwise do nothing with the object. * but compare with the head of w_clear_epoch */ - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); if (w->cb != w_send_barrier || mdev->state.conn < C_CONNECTED) cancel = 1; - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (cancel) return 1; From a0638456c6ef502506db6ea6cfd0265dfbcf6b51 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 14:31:32 +0100 Subject: [PATCH 035/609] drbd: moved crypto transformations and friends from mdev to tconn sed -i \ -e 's/mdev->cram_hmac_tfm/mdev->tconn->cram_hmac_tfm/g' \ -e 's/mdev->integrity_w_tfm/mdev->tconn->integrity_w_tfm/g' \ -e 's/mdev->integrity_r_tfm/mdev->tconn->integrity_r_tfm/g' \ -e 's/mdev->int_dig_out/mdev->tconn->int_dig_out/g' \ -e 's/mdev->int_dig_in/mdev->tconn->int_dig_in/g' \ -e 's/mdev->int_dig_vv/mdev->tconn->int_dig_vv/g' \ *.[ch] Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 13 ++++----- drivers/block/drbd/drbd_main.c | 42 +++++++++++++++--------------- drivers/block/drbd/drbd_nl.c | 24 ++++++++--------- drivers/block/drbd/drbd_receiver.c | 32 +++++++++++------------ 4 files changed, 56 insertions(+), 55 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index b440ffd1498..af1a2b79508 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -982,6 +982,13 @@ struct drbd_tconn { /* is a resource from the config file */ struct drbd_tl_epoch *oldest_tle; struct list_head out_of_sequence_requests; + struct crypto_hash *cram_hmac_tfm; + struct crypto_hash *integrity_w_tfm; /* to be used by the worker thread */ + struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */ + void *int_dig_out; + void *int_dig_in; + void *int_dig_vv; + struct drbd_thread receiver; struct drbd_thread worker; struct drbd_thread asender; @@ -1114,12 +1121,6 @@ struct drbd_conf { unsigned int al_tr_number; int al_tr_cycle; int al_tr_pos; /* position of the next transaction in the journal */ - struct crypto_hash *cram_hmac_tfm; - struct crypto_hash *integrity_w_tfm; /* to be used by the worker thread */ - struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */ - void *int_dig_out; - void *int_dig_in; - void *int_dig_vv; wait_queue_head_t seq_wait; atomic_t packet_seq; unsigned int peer_seq; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index c063cd51322..699f63929c1 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2404,8 +2404,8 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, struct p_data *dp, int data_size) { - data_size -= (mdev->tconn->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? - crypto_hash_digestsize(mdev->integrity_r_tfm) : 0; + data_size -= (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ? + crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0; return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size), dp->block_id); } @@ -2670,8 +2670,8 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) if (!drbd_get_data_sock(mdev)) return 0; - dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? - crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; + dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ? + crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0; if (req->i.size <= DRBD_MAX_SIZE_H80_PACKET) { p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); @@ -2701,8 +2701,8 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) ok = (sizeof(p) == drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0)); if (ok && dgs) { - dgb = mdev->int_dig_out; - drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); + dgb = mdev->tconn->int_dig_out; + drbd_csum_bio(mdev, mdev->tconn->integrity_w_tfm, req->master_bio, dgb); ok = dgs == drbd_send(mdev, mdev->tconn->data.socket, dgb, dgs, 0); } if (ok) { @@ -2727,8 +2727,8 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) /* 64 byte, 512 bit, is the largest digest size * currently supported in kernel crypto. */ unsigned char digest[64]; - drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest); - if (memcmp(mdev->int_dig_out, digest, dgs)) { + drbd_csum_bio(mdev, mdev->tconn->integrity_w_tfm, req->master_bio, digest); + if (memcmp(mdev->tconn->int_dig_out, digest, dgs)) { dev_warn(DEV, "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n", (unsigned long long)req->i.sector, req->i.size); @@ -2755,8 +2755,8 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, void *dgb; int dgs; - dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? - crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; + dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ? + crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0; if (e->i.size <= DRBD_MAX_SIZE_H80_PACKET) { p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); @@ -2783,8 +2783,8 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, ok = sizeof(p) == drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0); if (ok && dgs) { - dgb = mdev->int_dig_out; - drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); + dgb = mdev->tconn->int_dig_out; + drbd_csum_ee(mdev, mdev->tconn->integrity_w_tfm, e, dgb); ok = dgs == drbd_send(mdev, mdev->tconn->data.socket, dgb, dgs, 0); } if (ok) @@ -3276,9 +3276,9 @@ static void drbd_delete_device(unsigned int minor) kfree(mdev->p_uuid); /* mdev->p_uuid = NULL; */ - kfree(mdev->int_dig_out); - kfree(mdev->int_dig_in); - kfree(mdev->int_dig_vv); + kfree(mdev->tconn->int_dig_out); + kfree(mdev->tconn->int_dig_in); + kfree(mdev->tconn->int_dig_vv); /* cleanup the rest that has been * allocated from drbd_new_device @@ -3629,12 +3629,12 @@ void drbd_free_resources(struct drbd_conf *mdev) mdev->csums_tfm = NULL; crypto_free_hash(mdev->verify_tfm); mdev->verify_tfm = NULL; - crypto_free_hash(mdev->cram_hmac_tfm); - mdev->cram_hmac_tfm = NULL; - crypto_free_hash(mdev->integrity_w_tfm); - mdev->integrity_w_tfm = NULL; - crypto_free_hash(mdev->integrity_r_tfm); - mdev->integrity_r_tfm = NULL; + crypto_free_hash(mdev->tconn->cram_hmac_tfm); + mdev->tconn->cram_hmac_tfm = NULL; + crypto_free_hash(mdev->tconn->integrity_w_tfm); + mdev->tconn->integrity_w_tfm = NULL; + crypto_free_hash(mdev->tconn->integrity_r_tfm); + mdev->tconn->integrity_r_tfm = NULL; drbd_free_sock(mdev); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 4eaf81a463b..08368087332 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1532,21 +1532,21 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, mdev->send_cnt = 0; mdev->recv_cnt = 0; - crypto_free_hash(mdev->cram_hmac_tfm); - mdev->cram_hmac_tfm = tfm; + crypto_free_hash(mdev->tconn->cram_hmac_tfm); + mdev->tconn->cram_hmac_tfm = tfm; - crypto_free_hash(mdev->integrity_w_tfm); - mdev->integrity_w_tfm = integrity_w_tfm; + crypto_free_hash(mdev->tconn->integrity_w_tfm); + mdev->tconn->integrity_w_tfm = integrity_w_tfm; - crypto_free_hash(mdev->integrity_r_tfm); - mdev->integrity_r_tfm = integrity_r_tfm; + crypto_free_hash(mdev->tconn->integrity_r_tfm); + mdev->tconn->integrity_r_tfm = integrity_r_tfm; - kfree(mdev->int_dig_out); - kfree(mdev->int_dig_in); - kfree(mdev->int_dig_vv); - mdev->int_dig_out=int_dig_out; - mdev->int_dig_in=int_dig_in; - mdev->int_dig_vv=int_dig_vv; + kfree(mdev->tconn->int_dig_out); + kfree(mdev->tconn->int_dig_in); + kfree(mdev->tconn->int_dig_vv); + mdev->tconn->int_dig_out=int_dig_out; + mdev->tconn->int_dig_in=int_dig_in; + mdev->tconn->int_dig_vv=int_dig_vv; retcode = _drbd_set_state(_NS(mdev, conn, C_UNCONNECTED), CS_VERBOSE, NULL); spin_unlock_irq(&mdev->tconn->req_lock); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index af968a0bae0..4b37010cf46 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -880,7 +880,7 @@ retry: if (h <= 0) return h; - if (mdev->cram_hmac_tfm) { + if (mdev->tconn->cram_hmac_tfm) { /* drbd_request_state(mdev, NS(conn, WFAuth)); */ switch (drbd_do_auth(mdev)) { case -1: @@ -1240,12 +1240,12 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __ struct drbd_epoch_entry *e; struct page *page; int dgs, ds, rr; - void *dig_in = mdev->int_dig_in; - void *dig_vv = mdev->int_dig_vv; + void *dig_in = mdev->tconn->int_dig_in; + void *dig_vv = mdev->tconn->int_dig_vv; unsigned long *data; - dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? - crypto_hash_digestsize(mdev->integrity_r_tfm) : 0; + dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ? + crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0; if (dgs) { rr = drbd_recv(mdev, dig_in, dgs); @@ -1306,7 +1306,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __ } if (dgs) { - drbd_csum_ee(mdev, mdev->integrity_r_tfm, e, dig_vv); + drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, e, dig_vv); if (memcmp(dig_in, dig_vv, dgs)) { dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n", (unsigned long long)sector, data_size); @@ -1358,11 +1358,11 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, struct bio_vec *bvec; struct bio *bio; int dgs, rr, i, expect; - void *dig_in = mdev->int_dig_in; - void *dig_vv = mdev->int_dig_vv; + void *dig_in = mdev->tconn->int_dig_in; + void *dig_vv = mdev->tconn->int_dig_vv; - dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? - crypto_hash_digestsize(mdev->integrity_r_tfm) : 0; + dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ? + crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0; if (dgs) { rr = drbd_recv(mdev, dig_in, dgs); @@ -1401,7 +1401,7 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, } if (dgs) { - drbd_csum_bio(mdev, mdev->integrity_r_tfm, bio, dig_vv); + drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv); if (memcmp(dig_in, dig_vv, dgs)) { dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n"); return 0; @@ -3841,8 +3841,8 @@ static void drbd_disconnect(struct drbd_conf *mdev) if (os.conn == C_DISCONNECTING) { wait_event(mdev->tconn->net_cnt_wait, atomic_read(&mdev->tconn->net_cnt) == 0); - crypto_free_hash(mdev->cram_hmac_tfm); - mdev->cram_hmac_tfm = NULL; + crypto_free_hash(mdev->tconn->cram_hmac_tfm); + mdev->tconn->cram_hmac_tfm = NULL; kfree(mdev->tconn->net_conf); mdev->tconn->net_conf = NULL; @@ -4012,10 +4012,10 @@ static int drbd_do_auth(struct drbd_conf *mdev) unsigned int length; int rv; - desc.tfm = mdev->cram_hmac_tfm; + desc.tfm = mdev->tconn->cram_hmac_tfm; desc.flags = 0; - rv = crypto_hash_setkey(mdev->cram_hmac_tfm, + rv = crypto_hash_setkey(mdev->tconn->cram_hmac_tfm, (u8 *)mdev->tconn->net_conf->shared_secret, key_len); if (rv) { dev_err(DEV, "crypto_hash_setkey() failed with %d\n", rv); @@ -4062,7 +4062,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) goto fail; } - resp_size = crypto_hash_digestsize(mdev->cram_hmac_tfm); + resp_size = crypto_hash_digestsize(mdev->tconn->cram_hmac_tfm); response = kmalloc(resp_size, GFP_NOIO); if (response == NULL) { dev_err(DEV, "kmalloc of response failed\n"); From 191d3cc8d9291bbfea66f3debf19d6c2f85b4752 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 14:53:22 +0100 Subject: [PATCH 036/609] drbd: Made drbd_flush_workqueue() to take a tconn instead of an mdev Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_nl.c | 6 +++--- drivers/block/drbd/drbd_receiver.c | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index af1a2b79508..526928c368c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1609,7 +1609,7 @@ extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head); extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled); extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed); -extern void drbd_flush_workqueue(struct drbd_conf *mdev); +extern void drbd_flush_workqueue(struct drbd_tconn *tconn); /* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to * mess with get_fs/set_fs, we know we are KERNEL_DS always. */ diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 08368087332..8b8894e10e6 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -876,7 +876,7 @@ static void drbd_reconfig_start(struct drbd_conf *mdev) wait_event(mdev->state_wait, !test_and_set_bit(CONFIG_PENDING, &mdev->flags)); wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags)); drbd_thread_start(&mdev->tconn->worker); - drbd_flush_workqueue(mdev); + drbd_flush_workqueue(mdev->tconn); } /* if still unconfigured, stops worker again. @@ -1076,7 +1076,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp /* also wait for the last barrier ack. */ wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || is_susp(mdev->state)); /* and for any other previously queued work */ - drbd_flush_workqueue(mdev); + drbd_flush_workqueue(mdev->tconn); rv = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE); retcode = rv; /* FIXME: Type mismatch. */ @@ -1520,7 +1520,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, } } - drbd_flush_workqueue(mdev); + drbd_flush_workqueue(mdev->tconn); spin_lock_irq(&mdev->tconn->req_lock); if (mdev->tconn->net_conf != NULL) { retcode = ERR_NET_CONFIGURED; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 4b37010cf46..fbf93826ef0 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3747,13 +3747,13 @@ static void drbdd(struct drbd_conf *mdev) drbd_md_sync(mdev); } -void drbd_flush_workqueue(struct drbd_conf *mdev) +void drbd_flush_workqueue(struct drbd_tconn *tconn) { struct drbd_wq_barrier barr; barr.w.cb = w_prev_work_done; init_completion(&barr.done); - drbd_queue_work(&mdev->tconn->data.work, &barr.w); + drbd_queue_work(&tconn->data.work, &barr.w); wait_for_completion(&barr.done); } @@ -3803,7 +3803,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier, * w_make_resync_request etc. which may still be on the worker queue * to be "canceled" */ - drbd_flush_workqueue(mdev); + drbd_flush_workqueue(mdev->tconn); /* This also does reclaim_net_ee(). If we do this too early, we might * miss some resync ee and pages.*/ From c6d25cfe52a32232e4de0bbe6ddf8219f054f55c Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 16:13:06 +0100 Subject: [PATCH 037/609] drbd: Preparing to use p_header96 for all packets recv_bm_rle_bits() should not make any assumptions abou the layout of the packet header Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index fbf93826ef0..12fdd737cb6 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3393,7 +3393,8 @@ receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size, static int recv_bm_rle_bits(struct drbd_conf *mdev, struct p_compressed_bm *p, - struct bm_xfer_ctx *c) + struct bm_xfer_ctx *c, + unsigned int len) { struct bitstream bs; u64 look_ahead; @@ -3401,7 +3402,6 @@ recv_bm_rle_bits(struct drbd_conf *mdev, u64 tmp; unsigned long s = c->bit_offset; unsigned long e; - int len = be16_to_cpu(p->head.length) - (sizeof(*p) - sizeof(p->head)); int toggle = DCBP_get_start(p); int have; int bits; @@ -3458,10 +3458,11 @@ recv_bm_rle_bits(struct drbd_conf *mdev, static int decode_bitmap_c(struct drbd_conf *mdev, struct p_compressed_bm *p, - struct bm_xfer_ctx *c) + struct bm_xfer_ctx *c, + unsigned int len) { if (DCBP_get_code(p) == RLE_VLI_Bits) - return recv_bm_rle_bits(mdev, p, c); + return recv_bm_rle_bits(mdev, p, c, len); /* other variants had been implemented for evaluation, * but have been dropped as this one turned out to be "best" @@ -3560,7 +3561,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size); goto out; } - err = decode_bitmap_c(mdev, p, &c); + err = decode_bitmap_c(mdev, p, &c, data_size); } else { dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd); goto out; From c012949a4084a9f91654121d28f199ef408cb9d7 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 16:58:16 +0100 Subject: [PATCH 038/609] drbd: Replaced all p_header80 with a generic p_header Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 64 +++++++++++++++--------------- drivers/block/drbd/drbd_main.c | 54 ++++++++++--------------- drivers/block/drbd/drbd_receiver.c | 16 ++++---- drivers/block/drbd/drbd_worker.c | 2 +- 4 files changed, 64 insertions(+), 72 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 526928c368c..dc669dfe5b0 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -338,7 +338,6 @@ struct p_header80 { u32 magic; u16 command; u16 length; /* bytes of data after this header */ - u8 payload[0]; } __packed; /* Header for big packets, Used for data packets exceeding 64kB */ @@ -349,9 +348,12 @@ struct p_header95 { u8 payload[0]; } __packed; -union p_header { - struct p_header80 h80; - struct p_header95 h95; +struct p_header { + union { + struct p_header80 h80; + struct p_header95 h95; + }; + u8 payload[0]; }; /* @@ -380,7 +382,7 @@ union p_header { #define DP_DISCARD 64 /* equals REQ_DISCARD */ struct p_data { - union p_header head; + struct p_header head; u64 sector; /* 64 bits sector number */ u64 block_id; /* to identify the request in protocol B&C */ u32 seq_num; @@ -396,7 +398,7 @@ struct p_data { * P_DATA_REQUEST, P_RS_DATA_REQUEST */ struct p_block_ack { - struct p_header80 head; + struct p_header head; u64 sector; u64 block_id; u32 blksize; @@ -405,7 +407,7 @@ struct p_block_ack { struct p_block_req { - struct p_header80 head; + struct p_header head; u64 sector; u64 block_id; u32 blksize; @@ -422,7 +424,7 @@ struct p_block_req { */ struct p_handshake { - struct p_header80 head; /* 8 bytes */ + struct p_header head; /* Note: You must always use a h80 here */ u32 protocol_min; u32 feature_flags; u32 protocol_max; @@ -437,19 +439,19 @@ struct p_handshake { /* 80 bytes, FIXED for the next century */ struct p_barrier { - struct p_header80 head; + struct p_header head; u32 barrier; /* barrier number _handle_ only */ u32 pad; /* to multiple of 8 Byte */ } __packed; struct p_barrier_ack { - struct p_header80 head; + struct p_header head; u32 barrier; u32 set_size; } __packed; struct p_rs_param { - struct p_header80 head; + struct p_header head; u32 rate; /* Since protocol version 88 and higher. */ @@ -457,7 +459,7 @@ struct p_rs_param { } __packed; struct p_rs_param_89 { - struct p_header80 head; + struct p_header head; u32 rate; /* protocol version 89: */ char verify_alg[SHARED_SECRET_MAX]; @@ -465,7 +467,7 @@ struct p_rs_param_89 { } __packed; struct p_rs_param_95 { - struct p_header80 head; + struct p_header head; u32 rate; char verify_alg[SHARED_SECRET_MAX]; char csums_alg[SHARED_SECRET_MAX]; @@ -481,7 +483,7 @@ enum drbd_conn_flags { }; struct p_protocol { - struct p_header80 head; + struct p_header head; u32 protocol; u32 after_sb_0p; u32 after_sb_1p; @@ -495,17 +497,17 @@ struct p_protocol { } __packed; struct p_uuids { - struct p_header80 head; + struct p_header head; u64 uuid[UI_EXTENDED_SIZE]; } __packed; struct p_rs_uuid { - struct p_header80 head; + struct p_header head; u64 uuid; } __packed; struct p_sizes { - struct p_header80 head; + struct p_header head; u64 d_size; /* size of disk */ u64 u_size; /* user requested size */ u64 c_size; /* current exported size */ @@ -515,18 +517,18 @@ struct p_sizes { } __packed; struct p_state { - struct p_header80 head; + struct p_header head; u32 state; } __packed; struct p_req_state { - struct p_header80 head; + struct p_header head; u32 mask; u32 val; } __packed; struct p_req_state_reply { - struct p_header80 head; + struct p_header head; u32 retcode; } __packed; @@ -541,14 +543,14 @@ struct p_drbd06_param { } __packed; struct p_discard { - struct p_header80 head; + struct p_header head; u64 block_id; u32 seq_num; u32 pad; } __packed; struct p_block_desc { - struct p_header80 head; + struct p_header head; u64 sector; u32 blksize; u32 pad; /* to multiple of 8 Byte */ @@ -564,7 +566,7 @@ enum drbd_bitmap_code { }; struct p_compressed_bm { - struct p_header80 head; + struct p_header head; /* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code * (encoding & 0x80): polarity (set/unset) of first runlength * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits @@ -576,7 +578,7 @@ struct p_compressed_bm { } __packed; struct p_delay_probe93 { - struct p_header80 head; + struct p_header head; u32 seq_num; /* sequence number to match the two probe packets */ u32 offset; /* usecs the probe got sent after the reference time point */ } __packed; @@ -625,7 +627,7 @@ DCBP_set_pad_bits(struct p_compressed_bm *p, int n) * so we need to use the fixed size 4KiB page size * most architectures have used for a long time. */ -#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header80)) +#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header)) #define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long)) #define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm)) #if (PAGE_SIZE < 4096) @@ -634,7 +636,7 @@ DCBP_set_pad_bits(struct p_compressed_bm *p, int n) #endif union p_polymorph { - union p_header header; + struct p_header header; struct p_handshake handshake; struct p_data data; struct p_block_ack block_ack; @@ -1245,12 +1247,12 @@ extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_f extern int _drbd_send_state(struct drbd_conf *mdev); extern int drbd_send_state(struct drbd_conf *mdev); extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, - enum drbd_packets cmd, struct p_header80 *h, + enum drbd_packets cmd, struct p_header *h, size_t size, unsigned msg_flags); #define USE_DATA_SOCKET 1 #define USE_META_SOCKET 0 extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, - enum drbd_packets cmd, struct p_header80 *h, + enum drbd_packets cmd, struct p_header *h, size_t size); extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data, size_t size); @@ -2019,19 +2021,19 @@ static inline void request_ping(struct drbd_conf *mdev) static inline int drbd_send_short_cmd(struct drbd_conf *mdev, enum drbd_packets cmd) { - struct p_header80 h; + struct p_header h; return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h)); } static inline int drbd_send_ping(struct drbd_conf *mdev) { - struct p_header80 h; + struct p_header h; return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h)); } static inline int drbd_send_ping_ack(struct drbd_conf *mdev) { - struct p_header80 h; + struct p_header h; return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h)); } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 699f63929c1..55ce48e24b8 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1822,9 +1822,10 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev) /* the appropriate socket mutex must be held already */ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, - enum drbd_packets cmd, struct p_header80 *h, + enum drbd_packets cmd, struct p_header *hg, size_t size, unsigned msg_flags) { + struct p_header80 *h = (struct p_header80 *)hg; int sent, ok; if (!expect(h)) @@ -1849,7 +1850,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, * when we hold the appropriate socket mutex. */ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, - enum drbd_packets cmd, struct p_header80 *h, size_t size) + enum drbd_packets cmd, struct p_header *h, size_t size) { int ok = 0; struct socket *sock; @@ -1983,8 +1984,7 @@ int drbd_send_protocol(struct drbd_conf *mdev) if (mdev->tconn->agreed_pro_version >= 87) strcpy(p->integrity_alg, mdev->tconn->net_conf->integrity_alg); - rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL, - (struct p_header80 *)p, size); + rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL, &p->head, size); kfree(p); return rv; } @@ -2009,8 +2009,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) put_ldev(mdev); - return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS, - (struct p_header80 *)&p, sizeof(p)); + return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS, &p.head, sizeof(p)); } int drbd_send_uuids(struct drbd_conf *mdev) @@ -2054,8 +2053,7 @@ int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev) drbd_md_sync(mdev); p.uuid = cpu_to_be64(uuid); - return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID, - (struct p_header80 *)&p, sizeof(p)); + return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID, &p.head, sizeof(p)); } int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags) @@ -2087,8 +2085,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl p.queue_order_type = cpu_to_be16(q_order_type); p.dds_flags = cpu_to_be16(flags); - ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES, - (struct p_header80 *)&p, sizeof(p)); + ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES, &p.head, sizeof(p)); return ok; } @@ -2112,8 +2109,7 @@ int drbd_send_state(struct drbd_conf *mdev) sock = mdev->tconn->data.socket; if (likely(sock != NULL)) { - ok = _drbd_send_cmd(mdev, sock, P_STATE, - (struct p_header80 *)&p, sizeof(p), 0); + ok = _drbd_send_cmd(mdev, sock, P_STATE, &p.head, sizeof(p), 0); } mutex_unlock(&mdev->tconn->data.mutex); @@ -2130,8 +2126,7 @@ int drbd_send_state_req(struct drbd_conf *mdev, p.mask = cpu_to_be32(mask.i); p.val = cpu_to_be32(val.i); - return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_STATE_CHG_REQ, - (struct p_header80 *)&p, sizeof(p)); + return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_STATE_CHG_REQ, &p.head, sizeof(p)); } int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode) @@ -2140,8 +2135,7 @@ int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode) p.retcode = cpu_to_be32(retcode); - return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY, - (struct p_header80 *)&p, sizeof(p)); + return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY, &p.head, sizeof(p)); } int fill_bitmap_rle_bits(struct drbd_conf *mdev, @@ -2246,7 +2240,7 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, */ static int send_bitmap_rle_or_plain(struct drbd_conf *mdev, - struct p_header80 *h, struct bm_xfer_ctx *c) + struct p_header *h, struct bm_xfer_ctx *c) { struct p_compressed_bm *p = (void*)h; unsigned long num_words; @@ -2300,7 +2294,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, int _drbd_send_bitmap(struct drbd_conf *mdev) { struct bm_xfer_ctx c; - struct p_header80 *p; + struct p_header *p; int err; if (!expect(mdev->bitmap)) @@ -2308,7 +2302,7 @@ int _drbd_send_bitmap(struct drbd_conf *mdev) /* maybe we should use some per thread scratch page, * and allocate that during initial device creation? */ - p = (struct p_header80 *) __get_free_page(GFP_NOIO); + p = (struct p_header *) __get_free_page(GFP_NOIO); if (!p) { dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__); return false; @@ -2365,8 +2359,7 @@ int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size) if (mdev->state.conn < C_CONNECTED) return false; - ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK, - (struct p_header80 *)&p, sizeof(p)); + ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK, &p.head, sizeof(p)); return ok; } @@ -2393,8 +2386,7 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, if (!mdev->tconn->meta.socket || mdev->state.conn < C_CONNECTED) return false; - ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd, - (struct p_header80 *)&p, sizeof(p)); + ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd, &p.head, sizeof(p)); return ok; } @@ -2452,8 +2444,7 @@ int drbd_send_drequest(struct drbd_conf *mdev, int cmd, p.block_id = block_id; p.blksize = cpu_to_be32(size); - ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, - (struct p_header80 *)&p, sizeof(p)); + ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &p.head, sizeof(p)); return ok; } @@ -2469,9 +2460,9 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev, p.block_id = ID_SYNCER /* unused */; p.blksize = cpu_to_be32(size); - p.head.magic = cpu_to_be32(DRBD_MAGIC); - p.head.command = cpu_to_be16(cmd); - p.head.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size); + p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); + p.head.h80.command = cpu_to_be16(cmd); + p.head.h80.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size); mutex_lock(&mdev->tconn->data.mutex); @@ -2492,8 +2483,7 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) p.block_id = ID_SYNCER /* unused */; p.blksize = cpu_to_be32(size); - ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST, - (struct p_header80 *)&p, sizeof(p)); + ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST, &p.head, sizeof(p)); return ok; } @@ -2677,12 +2667,12 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); p.head.h80.command = cpu_to_be16(P_DATA); p.head.h80.length = - cpu_to_be16(sizeof(p) - sizeof(union p_header) + dgs + req->i.size); + cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + req->i.size); } else { p.head.h95.magic = cpu_to_be16(DRBD_MAGIC_BIG); p.head.h95.command = cpu_to_be16(P_DATA); p.head.h95.length = - cpu_to_be32(sizeof(p) - sizeof(union p_header) + dgs + req->i.size); + cpu_to_be32(sizeof(p) - sizeof(struct p_header) + dgs + req->i.size); } p.sector = cpu_to_be64(req->i.sector); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 12fdd737cb6..9393fe482ef 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -700,7 +700,7 @@ out: static int drbd_send_fp(struct drbd_conf *mdev, struct socket *sock, enum drbd_packets cmd) { - struct p_header80 *h = &mdev->tconn->data.sbuf.header.h80; + struct p_header *h = &mdev->tconn->data.sbuf.header; return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0); } @@ -925,7 +925,7 @@ out_release_sockets: static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsigned int *packet_size) { - union p_header *h = &mdev->tconn->data.rbuf.header; + struct p_header *h = &mdev->tconn->data.rbuf.header; int r; r = drbd_recv(mdev, h, sizeof(*h)); @@ -3477,7 +3477,7 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev, const char *direction, struct bm_xfer_ctx *c) { /* what would it take to transfer it "plaintext" */ - unsigned plain = sizeof(struct p_header80) * + unsigned plain = sizeof(struct p_header) * ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1) + c->bm_words * sizeof(long); unsigned total = c->bytes[0] + c->bytes[1]; @@ -3699,7 +3699,7 @@ static struct data_cmd drbd_cmd_handler[] = { static void drbdd(struct drbd_conf *mdev) { - union p_header *header = &mdev->tconn->data.rbuf.header; + struct p_header *header = &mdev->tconn->data.rbuf.header; unsigned int packet_size; enum drbd_packets cmd; size_t shs; /* sub header size */ @@ -3715,14 +3715,14 @@ static void drbdd(struct drbd_conf *mdev) goto err_out; } - shs = drbd_cmd_handler[cmd].pkt_size - sizeof(union p_header); + shs = drbd_cmd_handler[cmd].pkt_size - sizeof(struct p_header); if (packet_size - shs > 0 && !drbd_cmd_handler[cmd].expect_payload) { dev_err(DEV, "No payload expected %s l:%d\n", cmdname(cmd), packet_size); goto err_out; } if (shs) { - rv = drbd_recv(mdev, &header->h80.payload, shs); + rv = drbd_recv(mdev, &header->payload, shs); if (unlikely(rv != shs)) { if (!signal_pending(current)) dev_warn(DEV, "short read while reading sub header: rv=%d\n", rv); @@ -3909,8 +3909,8 @@ static int drbd_send_handshake(struct drbd_conf *mdev) memset(p, 0, sizeof(*p)); p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); - ok = _drbd_send_cmd( mdev, mdev->tconn->data.socket, P_HAND_SHAKE, - (struct p_header80 *)p, sizeof(*p), 0 ); + ok = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_HAND_SHAKE, + &p->head, sizeof(*p), 0 ); mutex_unlock(&mdev->tconn->data.mutex); return ok; } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 671251af6bc..afad8ea4d88 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1224,7 +1224,7 @@ int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * dec_ap_pending will be done in got_BarrierAck * or (on connection loss) in w_clear_epoch. */ ok = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_BARRIER, - (struct p_header80 *)p, sizeof(*p), 0); + &p->head, sizeof(*p), 0); drbd_put_data_sock(mdev); return ok; From fd340c12c98b57ec0751ebb317057eee41be0c3d Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 16:57:39 +0100 Subject: [PATCH 039/609] drbd: Use new header layout The new header layout will only be used if the peer supports it of course. For the first packet and the handshake packet the old (h80) layout is used for compatibility reasons. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 - drivers/block/drbd/drbd_main.c | 82 +++++++++++++----------------- drivers/block/drbd/drbd_receiver.c | 7 ++- include/linux/drbd.h | 2 +- 4 files changed, 42 insertions(+), 50 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index dc669dfe5b0..4de43481bcb 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -345,7 +345,6 @@ struct p_header95 { u16 magic; /* use DRBD_MAGIC_BIG here */ u16 command; u32 length; /* Use only 24 bits of that. Ignore the highest 8 bit. */ - u8 payload[0]; } __packed; struct p_header { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 55ce48e24b8..f8cb15c84ed 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1820,12 +1820,36 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev) } #endif +static void prepare_header80(struct drbd_conf *mdev, struct p_header80 *h, + enum drbd_packets cmd, int size) +{ + h->magic = cpu_to_be32(DRBD_MAGIC); + h->command = cpu_to_be16(cmd); + h->length = cpu_to_be16(size); +} + +static void prepare_header95(struct drbd_conf *mdev, struct p_header95 *h, + enum drbd_packets cmd, int size) +{ + h->magic = cpu_to_be16(DRBD_MAGIC_BIG); + h->command = cpu_to_be16(cmd); + h->length = cpu_to_be32(size); +} + +static void prepare_header(struct drbd_conf *mdev, struct p_header *h, + enum drbd_packets cmd, int size) +{ + if (mdev->tconn->agreed_pro_version >= 100 || size > DRBD_MAX_SIZE_H80_PACKET) + prepare_header95(mdev, &h->h95, cmd, size); + else + prepare_header80(mdev, &h->h80, cmd, size); +} + /* the appropriate socket mutex must be held already */ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, - enum drbd_packets cmd, struct p_header *hg, + enum drbd_packets cmd, struct p_header *h, size_t size, unsigned msg_flags) { - struct p_header80 *h = (struct p_header80 *)hg; int sent, ok; if (!expect(h)) @@ -1833,9 +1857,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, if (!expect(size)) return false; - h->magic = cpu_to_be32(DRBD_MAGIC); - h->command = cpu_to_be16(cmd); - h->length = cpu_to_be16(size-sizeof(struct p_header80)); + prepare_header(mdev, h, cmd, size - sizeof(struct p_header)); sent = drbd_send(mdev, sock, h, size, msg_flags); @@ -1878,12 +1900,10 @@ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data, size_t size) { - struct p_header80 h; + struct p_header h; int ok; - h.magic = cpu_to_be32(DRBD_MAGIC); - h.command = cpu_to_be16(cmd); - h.length = cpu_to_be16(size); + prepare_header(mdev, &h, cmd, size); if (!drbd_get_data_sock(mdev)) return 0; @@ -2456,14 +2476,11 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev, int ok; struct p_block_req p; + prepare_header(mdev, &p.head, cmd, sizeof(p) - sizeof(struct p_header) + digest_size); p.sector = cpu_to_be64(sector); p.block_id = ID_SYNCER /* unused */; p.blksize = cpu_to_be32(size); - p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); - p.head.h80.command = cpu_to_be16(cmd); - p.head.h80.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size); - mutex_lock(&mdev->tconn->data.mutex); ok = (sizeof(p) == drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), 0)); @@ -2663,22 +2680,10 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ? crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0; - if (req->i.size <= DRBD_MAX_SIZE_H80_PACKET) { - p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); - p.head.h80.command = cpu_to_be16(P_DATA); - p.head.h80.length = - cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + req->i.size); - } else { - p.head.h95.magic = cpu_to_be16(DRBD_MAGIC_BIG); - p.head.h95.command = cpu_to_be16(P_DATA); - p.head.h95.length = - cpu_to_be32(sizeof(p) - sizeof(struct p_header) + dgs + req->i.size); - } - + prepare_header(mdev, &p.head, P_DATA, sizeof(p) - sizeof(struct p_header) + dgs + req->i.size); p.sector = cpu_to_be64(req->i.sector); p.block_id = (unsigned long)req; - p.seq_num = cpu_to_be32(req->seq_num = - atomic_add_return(1, &mdev->packet_seq)); + p.seq_num = cpu_to_be32(req->seq_num = atomic_add_return(1, &mdev->packet_seq)); dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw); @@ -2748,18 +2753,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ? crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0; - if (e->i.size <= DRBD_MAX_SIZE_H80_PACKET) { - p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); - p.head.h80.command = cpu_to_be16(cmd); - p.head.h80.length = - cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size); - } else { - p.head.h95.magic = cpu_to_be16(DRBD_MAGIC_BIG); - p.head.h95.command = cpu_to_be16(cmd); - p.head.h95.length = - cpu_to_be32(sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size); - } - + prepare_header(mdev, &p.head, cmd, sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size); p.sector = cpu_to_be64(e->i.sector); p.block_id = e->block_id; /* p.seq_num = 0; No sequence numbers here.. */ @@ -3028,7 +3022,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) drbd_thread_init(mdev, &mdev->tconn->worker, drbd_worker); drbd_thread_init(mdev, &mdev->tconn->asender, drbd_asender); - mdev->tconn->agreed_pro_version = PRO_VERSION_MAX; + /* mdev->tconn->agreed_pro_version gets initialized in drbd_connect() */ mdev->write_ordering = WO_bdev_flush; mdev->resync_wenr = LC_FREE; mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; @@ -3506,12 +3500,8 @@ int __init drbd_init(void) { int err; - if (sizeof(struct p_handshake) != 80) { - printk(KERN_ERR - "drbd: never change the size or layout " - "of the HandShake packet.\n"); - return -EINVAL; - } + BUILD_BUG_ON(sizeof(struct p_header80) != sizeof(struct p_header95)); + BUILD_BUG_ON(sizeof(struct p_handshake) != 80); if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) { printk(KERN_ERR diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 9393fe482ef..8f5a241fe20 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -761,6 +761,9 @@ static int drbd_connect(struct drbd_conf *mdev) return -2; clear_bit(DISCARD_CONCURRENT, &mdev->flags); + mdev->tconn->agreed_pro_version = 99; + /* agreed_pro_version must be smaller than 100 so we send the old + header (h80) in the first packet and in the handshake packet. */ sock = NULL; msock = NULL; @@ -935,12 +938,12 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi return false; } - if (likely(h->h80.magic == cpu_to_be32(DRBD_MAGIC))) { + if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) { *cmd = be16_to_cpu(h->h80.command); *packet_size = be16_to_cpu(h->h80.length); } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) { *cmd = be16_to_cpu(h->h95.command); - *packet_size = be32_to_cpu(h->h95.length); + *packet_size = be32_to_cpu(h->h95.length) & 0x00ffffff; } else { dev_err(DEV, "magic?? on data m: 0x%08x c: %d l: %d\n", be32_to_cpu(h->h80.magic), diff --git a/include/linux/drbd.h b/include/linux/drbd.h index d2820281167..35fc08a0a55 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -56,7 +56,7 @@ extern const char *drbd_buildtag(void); #define REL_VERSION "8.3.11" #define API_VERSION 88 #define PRO_VERSION_MIN 86 -#define PRO_VERSION_MAX 96 +#define PRO_VERSION_MAX 100 enum drbd_io_error_p { From 257d0af689df9aaf6ebecfc8d66b15415006c257 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 26 Jan 2011 12:15:29 +0100 Subject: [PATCH 040/609] drbd: Implemented receiving of new style packets on meta socket Now drbd communication with protocol 100 actually works. Replaced the remaining p_header80 with p_header where we no longer know which header it is. In the places where p_header80 is still in use, it is on purpose, because we know that it is an old style header there. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 133 +++++++++++++++-------------- 1 file changed, 68 insertions(+), 65 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 8f5a241fe20..c0435c4f5d8 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -926,18 +926,9 @@ out_release_sockets: return -1; } -static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsigned int *packet_size) +static bool decode_header(struct drbd_conf *mdev, struct p_header *h, enum drbd_packets *cmd, + unsigned int *packet_size) { - struct p_header *h = &mdev->tconn->data.rbuf.header; - int r; - - r = drbd_recv(mdev, h, sizeof(*h)); - if (unlikely(r != sizeof(*h))) { - if (!signal_pending(current)) - dev_warn(DEV, "short read expecting header on sock: r=%d\n", r); - return false; - } - if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) { *cmd = be16_to_cpu(h->h80.command); *packet_size = be16_to_cpu(h->h80.length); @@ -951,9 +942,25 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi be16_to_cpu(h->h80.length)); return false; } + return true; +} + +static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsigned int *packet_size) +{ + struct p_header *h = &mdev->tconn->data.rbuf.header; + int r; + + r = drbd_recv(mdev, h, sizeof(*h)); + if (unlikely(r != sizeof(*h))) { + if (!signal_pending(current)) + dev_warn(DEV, "short read expecting header on sock: r=%d\n", r); + return false; + } + + r = decode_header(mdev, h, cmd, packet_size); mdev->tconn->last_received = jiffies; - return true; + return r; } static void drbd_flush(struct drbd_conf *mdev) @@ -2807,14 +2814,14 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi } if (apv <= 88) { - header_size = sizeof(struct p_rs_param) - sizeof(struct p_header80); + header_size = sizeof(struct p_rs_param) - sizeof(struct p_header); data_size = packet_size - header_size; } else if (apv <= 94) { - header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header80); + header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header); data_size = packet_size - header_size; D_ASSERT(data_size == 0); } else { - header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header80); + header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header); data_size = packet_size - header_size; D_ASSERT(data_size == 0); } @@ -3524,7 +3531,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne void *buffer; int err; int ok = false; - struct p_header80 *h = &mdev->tconn->data.rbuf.header.h80; + struct p_header *h = &mdev->tconn->data.rbuf.header; drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED); /* you are supposed to send additional out-of-sync information @@ -3571,7 +3578,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne } c.packets[cmd == P_BITMAP]++; - c.bytes[cmd == P_BITMAP] += sizeof(struct p_header80) + data_size; + c.bytes[cmd == P_BITMAP] += sizeof(struct p_header) + data_size; if (err <= 0) { if (err < 0) @@ -3670,13 +3677,13 @@ static struct data_cmd drbd_cmd_handler[] = { [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply }, [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } , [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } , - [P_BITMAP] = { 1, sizeof(struct p_header80), receive_bitmap } , - [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header80), receive_bitmap } , - [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header80), receive_UnplugRemote }, + [P_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } , + [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } , + [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), receive_UnplugRemote }, [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, - [P_SYNC_PARAM] = { 1, sizeof(struct p_header80), receive_SyncParam }, - [P_SYNC_PARAM89] = { 1, sizeof(struct p_header80), receive_SyncParam }, + [P_SYNC_PARAM] = { 1, sizeof(struct p_header), receive_SyncParam }, + [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), receive_SyncParam }, [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol }, [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids }, [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes }, @@ -4184,9 +4191,9 @@ int drbdd_init(struct drbd_thread *thi) /* ********* acknowledge sender ******** */ -static int got_RqSReply(struct drbd_conf *mdev, struct p_header80 *h) +static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packets cmd) { - struct p_req_state_reply *p = (struct p_req_state_reply *)h; + struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply; int retcode = be32_to_cpu(p->retcode); @@ -4202,13 +4209,13 @@ static int got_RqSReply(struct drbd_conf *mdev, struct p_header80 *h) return true; } -static int got_Ping(struct drbd_conf *mdev, struct p_header80 *h) +static int got_Ping(struct drbd_conf *mdev, enum drbd_packets cmd) { return drbd_send_ping_ack(mdev); } -static int got_PingAck(struct drbd_conf *mdev, struct p_header80 *h) +static int got_PingAck(struct drbd_conf *mdev, enum drbd_packets cmd) { /* restore idle timeout */ mdev->tconn->meta.socket->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ; @@ -4218,9 +4225,9 @@ static int got_PingAck(struct drbd_conf *mdev, struct p_header80 *h) return true; } -static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) +static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packets cmd) { - struct p_block_ack *p = (struct p_block_ack *)h; + struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; sector_t sector = be64_to_cpu(p->sector); int blksize = be32_to_cpu(p->blksize); @@ -4263,9 +4270,9 @@ validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector, return true; } -static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) +static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packets cmd) { - struct p_block_ack *p = (struct p_block_ack *)h; + struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; sector_t sector = be64_to_cpu(p->sector); int blksize = be32_to_cpu(p->blksize); enum drbd_req_event what; @@ -4277,7 +4284,7 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) dec_rs_pending(mdev); return true; } - switch (be16_to_cpu(h->command)) { + switch (cmd) { case P_RS_WRITE_ACK: D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); what = WRITE_ACKED_BY_PEER_AND_SIS; @@ -4304,9 +4311,9 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) what, false); } -static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) +static int got_NegAck(struct drbd_conf *mdev, enum drbd_packets cmd) { - struct p_block_ack *p = (struct p_block_ack *)h; + struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; sector_t sector = be64_to_cpu(p->sector); int size = be32_to_cpu(p->blksize); bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A || @@ -4337,9 +4344,9 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) return true; } -static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h) +static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packets cmd) { - struct p_block_ack *p = (struct p_block_ack *)h; + struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; sector_t sector = be64_to_cpu(p->sector); update_peer_seq(mdev, be32_to_cpu(p->seq_num)); @@ -4351,11 +4358,11 @@ static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h) NEG_ACKED, false); } -static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h) +static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packets cmd) { sector_t sector; int size; - struct p_block_ack *p = (struct p_block_ack *)h; + struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; sector = be64_to_cpu(p->sector); size = be32_to_cpu(p->blksize); @@ -4366,7 +4373,7 @@ static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h) if (get_ldev_if_state(mdev, D_FAILED)) { drbd_rs_complete_io(mdev, sector); - switch (be16_to_cpu(h->command)) { + switch (cmd) { case P_NEG_RS_DREPLY: drbd_rs_failed_io(mdev, sector, size); case P_RS_CANCEL: @@ -4382,9 +4389,9 @@ static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h) return true; } -static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h) +static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packets cmd) { - struct p_barrier_ack *p = (struct p_barrier_ack *)h; + struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack; tl_release(mdev, p->barrier, be32_to_cpu(p->set_size)); @@ -4398,9 +4405,9 @@ static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h) return true; } -static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) +static int got_OVResult(struct drbd_conf *mdev, enum drbd_packets cmd) { - struct p_block_ack *p = (struct p_block_ack *)h; + struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; struct drbd_work *w; sector_t sector; int size; @@ -4442,14 +4449,14 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) return true; } -static int got_skip(struct drbd_conf *mdev, struct p_header80 *h) +static int got_skip(struct drbd_conf *mdev, enum drbd_packets cmd) { return true; } struct asender_cmd { size_t pkt_size; - int (*process)(struct drbd_conf *mdev, struct p_header80 *h); + int (*process)(struct drbd_conf *mdev, enum drbd_packets cmd); }; static struct asender_cmd *get_asender_cmd(int cmd) @@ -4458,8 +4465,8 @@ static struct asender_cmd *get_asender_cmd(int cmd) /* anything missing from this table is in * the drbd_cmd_handler (drbd_default_handler) table, * see the beginning of drbdd() */ - [P_PING] = { sizeof(struct p_header80), got_Ping }, - [P_PING_ACK] = { sizeof(struct p_header80), got_PingAck }, + [P_PING] = { sizeof(struct p_header), got_Ping }, + [P_PING_ACK] = { sizeof(struct p_header), got_PingAck }, [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, @@ -4483,15 +4490,16 @@ static struct asender_cmd *get_asender_cmd(int cmd) int drbd_asender(struct drbd_thread *thi) { struct drbd_conf *mdev = thi->mdev; - struct p_header80 *h = &mdev->tconn->meta.rbuf.header.h80; + struct p_header *h = &mdev->tconn->meta.rbuf.header; struct asender_cmd *cmd = NULL; - int rv, len; + int rv; void *buf = h; int received = 0; - int expect = sizeof(struct p_header80); - int empty; + int expect = sizeof(struct p_header); int ping_timeout_active = 0; + int empty, pkt_size; + enum drbd_packets cmd_nr; sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); @@ -4581,30 +4589,25 @@ int drbd_asender(struct drbd_thread *thi) } if (received == expect && cmd == NULL) { - if (unlikely(h->magic != cpu_to_be32(DRBD_MAGIC))) { - dev_err(DEV, "magic?? on meta m: 0x%08x c: %d l: %d\n", - be32_to_cpu(h->magic), - be16_to_cpu(h->command), - be16_to_cpu(h->length)); + if (!decode_header(mdev, h, &cmd_nr, &pkt_size)) goto reconnect; - } - cmd = get_asender_cmd(be16_to_cpu(h->command)); - len = be16_to_cpu(h->length); + cmd = get_asender_cmd(cmd_nr); if (unlikely(cmd == NULL)) { - dev_err(DEV, "unknown command?? on meta m: 0x%08x c: %d l: %d\n", - be32_to_cpu(h->magic), - be16_to_cpu(h->command), - be16_to_cpu(h->length)); + dev_err(DEV, "unknown command %d on meta (l: %d)\n", + cmd_nr, pkt_size); goto disconnect; } expect = cmd->pkt_size; - if (!expect(len == expect - sizeof(struct p_header80))) + if (pkt_size != expect - sizeof(struct p_header)) { + dev_err(DEV, "Wrong packet size on meta (c: %d, l: %d)\n", + cmd_nr, pkt_size); goto reconnect; + } } if (received == expect) { mdev->tconn->last_received = jiffies; D_ASSERT(cmd != NULL); - if (!cmd->process(mdev, h)) + if (!cmd->process(mdev, cmd_nr)) goto reconnect; /* the idle_timeout (ping-int) @@ -4614,7 +4617,7 @@ int drbd_asender(struct drbd_thread *thi) buf = h; received = 0; - expect = sizeof(struct p_header80); + expect = sizeof(struct p_header); cmd = NULL; } } From b42a70ad32539019c15457fce172194b0f8353d5 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 27 Jan 2011 10:55:20 +0100 Subject: [PATCH 041/609] drbd: Do not access tconn after it was freed Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f8cb15c84ed..8349d42fa13 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3260,10 +3260,6 @@ static void drbd_delete_device(unsigned int minor) kfree(mdev->p_uuid); /* mdev->p_uuid = NULL; */ - kfree(mdev->tconn->int_dig_out); - kfree(mdev->tconn->int_dig_in); - kfree(mdev->tconn->int_dig_vv); - /* cleanup the rest that has been * allocated from drbd_new_device * and actually free the mdev itself */ @@ -3377,6 +3373,9 @@ void drbd_free_tconn(struct drbd_tconn *tconn) write_unlock_irq(&global_state_lock); kfree(tconn->name); + kfree(tconn->int_dig_out); + kfree(tconn->int_dig_in); + kfree(tconn->int_dig_vv); kfree(tconn); } From f2ad90637978e9cff3bdd32d414c9e851e47868c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 26 Jan 2011 17:13:25 +0100 Subject: [PATCH 042/609] drbd: Move cmdname() out of drbd_int.h There is no good reason for cmdname() to be an inline function. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 59 +--------------------------------- drivers/block/drbd/drbd_main.c | 59 ++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 58 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4de43481bcb..e8a1fa55695 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -221,64 +221,7 @@ enum drbd_packets { P_HAND_SHAKE = 0xfffe /* FIXED for the next century! */ }; -static inline const char *cmdname(enum drbd_packets cmd) -{ - /* THINK may need to become several global tables - * when we want to support more than - * one PRO_VERSION */ - static const char *cmdnames[] = { - [P_DATA] = "Data", - [P_DATA_REPLY] = "DataReply", - [P_RS_DATA_REPLY] = "RSDataReply", - [P_BARRIER] = "Barrier", - [P_BITMAP] = "ReportBitMap", - [P_BECOME_SYNC_TARGET] = "BecomeSyncTarget", - [P_BECOME_SYNC_SOURCE] = "BecomeSyncSource", - [P_UNPLUG_REMOTE] = "UnplugRemote", - [P_DATA_REQUEST] = "DataRequest", - [P_RS_DATA_REQUEST] = "RSDataRequest", - [P_SYNC_PARAM] = "SyncParam", - [P_SYNC_PARAM89] = "SyncParam89", - [P_PROTOCOL] = "ReportProtocol", - [P_UUIDS] = "ReportUUIDs", - [P_SIZES] = "ReportSizes", - [P_STATE] = "ReportState", - [P_SYNC_UUID] = "ReportSyncUUID", - [P_AUTH_CHALLENGE] = "AuthChallenge", - [P_AUTH_RESPONSE] = "AuthResponse", - [P_PING] = "Ping", - [P_PING_ACK] = "PingAck", - [P_RECV_ACK] = "RecvAck", - [P_WRITE_ACK] = "WriteAck", - [P_RS_WRITE_ACK] = "RSWriteAck", - [P_DISCARD_ACK] = "DiscardAck", - [P_NEG_ACK] = "NegAck", - [P_NEG_DREPLY] = "NegDReply", - [P_NEG_RS_DREPLY] = "NegRSDReply", - [P_BARRIER_ACK] = "BarrierAck", - [P_STATE_CHG_REQ] = "StateChgRequest", - [P_STATE_CHG_REPLY] = "StateChgReply", - [P_OV_REQUEST] = "OVRequest", - [P_OV_REPLY] = "OVReply", - [P_OV_RESULT] = "OVResult", - [P_CSUM_RS_REQUEST] = "CsumRSRequest", - [P_RS_IS_IN_SYNC] = "CsumRSIsInSync", - [P_COMPRESSED_BITMAP] = "CBitmap", - [P_DELAY_PROBE] = "DelayProbe", - [P_OUT_OF_SYNC] = "OutOfSync", - [P_MAX_CMD] = NULL, - }; - - if (cmd == P_HAND_SHAKE_M) - return "HandShakeM"; - if (cmd == P_HAND_SHAKE_S) - return "HandShakeS"; - if (cmd == P_HAND_SHAKE) - return "HandShake"; - if (cmd >= P_MAX_CMD) - return "Unknown"; - return cmdnames[cmd]; -} +extern const char *cmdname(enum drbd_packets cmd); /* for sending/receiving the bitmap, * possibly in some encoding scheme */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 8349d42fa13..6090276ad9f 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -4101,6 +4101,65 @@ static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused) return 1; } +const char *cmdname(enum drbd_packets cmd) +{ + /* THINK may need to become several global tables + * when we want to support more than + * one PRO_VERSION */ + static const char *cmdnames[] = { + [P_DATA] = "Data", + [P_DATA_REPLY] = "DataReply", + [P_RS_DATA_REPLY] = "RSDataReply", + [P_BARRIER] = "Barrier", + [P_BITMAP] = "ReportBitMap", + [P_BECOME_SYNC_TARGET] = "BecomeSyncTarget", + [P_BECOME_SYNC_SOURCE] = "BecomeSyncSource", + [P_UNPLUG_REMOTE] = "UnplugRemote", + [P_DATA_REQUEST] = "DataRequest", + [P_RS_DATA_REQUEST] = "RSDataRequest", + [P_SYNC_PARAM] = "SyncParam", + [P_SYNC_PARAM89] = "SyncParam89", + [P_PROTOCOL] = "ReportProtocol", + [P_UUIDS] = "ReportUUIDs", + [P_SIZES] = "ReportSizes", + [P_STATE] = "ReportState", + [P_SYNC_UUID] = "ReportSyncUUID", + [P_AUTH_CHALLENGE] = "AuthChallenge", + [P_AUTH_RESPONSE] = "AuthResponse", + [P_PING] = "Ping", + [P_PING_ACK] = "PingAck", + [P_RECV_ACK] = "RecvAck", + [P_WRITE_ACK] = "WriteAck", + [P_RS_WRITE_ACK] = "RSWriteAck", + [P_DISCARD_ACK] = "DiscardAck", + [P_NEG_ACK] = "NegAck", + [P_NEG_DREPLY] = "NegDReply", + [P_NEG_RS_DREPLY] = "NegRSDReply", + [P_BARRIER_ACK] = "BarrierAck", + [P_STATE_CHG_REQ] = "StateChgRequest", + [P_STATE_CHG_REPLY] = "StateChgReply", + [P_OV_REQUEST] = "OVRequest", + [P_OV_REPLY] = "OVReply", + [P_OV_RESULT] = "OVResult", + [P_CSUM_RS_REQUEST] = "CsumRSRequest", + [P_RS_IS_IN_SYNC] = "CsumRSIsInSync", + [P_COMPRESSED_BITMAP] = "CBitmap", + [P_DELAY_PROBE] = "DelayProbe", + [P_OUT_OF_SYNC] = "OutOfSync", + [P_MAX_CMD] = NULL, + }; + + if (cmd == P_HAND_SHAKE_M) + return "HandShakeM"; + if (cmd == P_HAND_SHAKE_S) + return "HandShakeS"; + if (cmd == P_HAND_SHAKE) + return "HandShake"; + if (cmd >= P_MAX_CMD) + return "Unknown"; + return cmdnames[cmd]; +} + #ifdef CONFIG_DRBD_FAULT_INJECTION /* Fault insertion support including random number generator shamelessly * stolen from kernel/rcutorture.c */ From d87630230616ba2c13141184258906d34c727b4b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 26 Jan 2011 17:39:41 +0100 Subject: [PATCH 043/609] drbd: Rename "enum drbd_packets" to "enum drbd_packet" Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 40 ++++++------ drivers/block/drbd/drbd_main.c | 43 ++++++------- drivers/block/drbd/drbd_receiver.c | 97 ++++++++++++++++++------------ 3 files changed, 97 insertions(+), 83 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e8a1fa55695..9f5c13513d6 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -159,7 +159,7 @@ extern struct drbd_conf **minor_table; extern struct ratelimit_state drbd_ratelimit_state; /* on the wire */ -enum drbd_packets { +enum drbd_packet { /* receiver (data socket) */ P_DATA = 0x00, P_DATA_REPLY = 0x01, /* Response to P_DATA_REQUEST */ @@ -221,7 +221,7 @@ enum drbd_packets { P_HAND_SHAKE = 0xfffe /* FIXED for the next century! */ }; -extern const char *cmdname(enum drbd_packets cmd); +extern const char *cmdname(enum drbd_packet cmd); /* for sending/receiving the bitmap, * possibly in some encoding scheme */ @@ -1189,36 +1189,34 @@ extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_f extern int _drbd_send_state(struct drbd_conf *mdev); extern int drbd_send_state(struct drbd_conf *mdev); extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, - enum drbd_packets cmd, struct p_header *h, - size_t size, unsigned msg_flags); + enum drbd_packet cmd, struct p_header *h, + size_t size, unsigned msg_flags); #define USE_DATA_SOCKET 1 #define USE_META_SOCKET 0 extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, - enum drbd_packets cmd, struct p_header *h, - size_t size); -extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, - char *data, size_t size); + enum drbd_packet cmd, struct p_header *h, size_t size); +extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packet cmd, + char *data, size_t size); extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc); extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size); -extern int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, - struct drbd_epoch_entry *e); -extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd, - struct p_block_req *rp); -extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, - struct p_data *dp, int data_size); -extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd, +extern int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, + struct drbd_epoch_entry *e); +extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd, + struct p_block_req *rp); +extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd, + struct p_data *dp, int data_size); +extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd, sector_t sector, int blksize, u64 block_id); extern int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req); -extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, +extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, struct drbd_epoch_entry *e); extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req); extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd, sector_t sector, int size, u64 block_id); -extern int drbd_send_drequest_csum(struct drbd_conf *mdev, - sector_t sector,int size, - void *digest, int digest_size, - enum drbd_packets cmd); +extern int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector, + int size, void *digest, int digest_size, + enum drbd_packet cmd); extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size); extern int drbd_send_bitmap(struct drbd_conf *mdev); @@ -1961,7 +1959,7 @@ static inline void request_ping(struct drbd_conf *mdev) } static inline int drbd_send_short_cmd(struct drbd_conf *mdev, - enum drbd_packets cmd) + enum drbd_packet cmd) { struct p_header h; return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h)); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 6090276ad9f..81bd1f3b135 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1821,7 +1821,7 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev) #endif static void prepare_header80(struct drbd_conf *mdev, struct p_header80 *h, - enum drbd_packets cmd, int size) + enum drbd_packet cmd, int size) { h->magic = cpu_to_be32(DRBD_MAGIC); h->command = cpu_to_be16(cmd); @@ -1829,7 +1829,7 @@ static void prepare_header80(struct drbd_conf *mdev, struct p_header80 *h, } static void prepare_header95(struct drbd_conf *mdev, struct p_header95 *h, - enum drbd_packets cmd, int size) + enum drbd_packet cmd, int size) { h->magic = cpu_to_be16(DRBD_MAGIC_BIG); h->command = cpu_to_be16(cmd); @@ -1837,7 +1837,7 @@ static void prepare_header95(struct drbd_conf *mdev, struct p_header95 *h, } static void prepare_header(struct drbd_conf *mdev, struct p_header *h, - enum drbd_packets cmd, int size) + enum drbd_packet cmd, int size) { if (mdev->tconn->agreed_pro_version >= 100 || size > DRBD_MAX_SIZE_H80_PACKET) prepare_header95(mdev, &h->h95, cmd, size); @@ -1847,8 +1847,8 @@ static void prepare_header(struct drbd_conf *mdev, struct p_header *h, /* the appropriate socket mutex must be held already */ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, - enum drbd_packets cmd, struct p_header *h, - size_t size, unsigned msg_flags) + enum drbd_packet cmd, struct p_header *h, size_t size, + unsigned msg_flags) { int sent, ok; @@ -1872,7 +1872,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, * when we hold the appropriate socket mutex. */ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, - enum drbd_packets cmd, struct p_header *h, size_t size) + enum drbd_packet cmd, struct p_header *h, size_t size) { int ok = 0; struct socket *sock; @@ -1897,7 +1897,7 @@ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, return ok; } -int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data, +int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packet cmd, char *data, size_t size) { struct p_header h; @@ -1938,7 +1938,8 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) sock = mdev->tconn->data.socket; if (likely(sock != NULL)) { - enum drbd_packets cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM; + enum drbd_packet cmd = + apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM; p = &mdev->tconn->data.sbuf.rs_param_95; @@ -2391,10 +2392,8 @@ int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size) * @blksize: size in byte, needs to be in big endian byte order * @block_id: Id, big endian byte order */ -static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, - u64 sector, - u32 blksize, - u64 block_id) +static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, + u64 sector, u32 blksize, u64 block_id) { int ok; struct p_block_ack p; @@ -2413,7 +2412,7 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, /* dp->sector and dp->block_id already/still in network byte order, * data_size is payload size according to dp->head, * and may need to be corrected for digest size. */ -int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, +int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd, struct p_data *dp, int data_size) { data_size -= (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ? @@ -2422,7 +2421,7 @@ int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, dp->block_id); } -int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd, +int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd, struct p_block_req *rp) { return _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id); @@ -2434,8 +2433,8 @@ int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd, * @cmd: Packet command code. * @e: Epoch entry. */ -int drbd_send_ack(struct drbd_conf *mdev, - enum drbd_packets cmd, struct drbd_epoch_entry *e) +int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, + struct drbd_epoch_entry *e) { return _drbd_send_ack(mdev, cmd, cpu_to_be64(e->i.sector), @@ -2445,7 +2444,7 @@ int drbd_send_ack(struct drbd_conf *mdev, /* This function misuses the block_id field to signal if the blocks * are is sync or not. */ -int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd, +int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd, sector_t sector, int blksize, u64 block_id) { return _drbd_send_ack(mdev, cmd, @@ -2468,10 +2467,8 @@ int drbd_send_drequest(struct drbd_conf *mdev, int cmd, return ok; } -int drbd_send_drequest_csum(struct drbd_conf *mdev, - sector_t sector, int size, - void *digest, int digest_size, - enum drbd_packets cmd) +int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector, int size, + void *digest, int digest_size, enum drbd_packet cmd) { int ok; struct p_block_req p; @@ -2742,7 +2739,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) * Peer -> (diskless) R_PRIMARY (P_DATA_REPLY) * C_SYNC_SOURCE -> C_SYNC_TARGET (P_RS_DATA_REPLY) */ -int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, +int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, struct drbd_epoch_entry *e) { int ok; @@ -4101,7 +4098,7 @@ static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused) return 1; } -const char *cmdname(enum drbd_packets cmd) +const char *cmdname(enum drbd_packet cmd) { /* THINK may need to become several global tables * when we want to support more than diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c0435c4f5d8..31f6875ceba 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -697,15 +697,16 @@ out: return s_estab; } -static int drbd_send_fp(struct drbd_conf *mdev, - struct socket *sock, enum drbd_packets cmd) +static int drbd_send_fp(struct drbd_conf *mdev, struct socket *sock, + enum drbd_packet cmd) { struct p_header *h = &mdev->tconn->data.sbuf.header; return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0); } -static enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock) +static enum drbd_packet drbd_recv_fp(struct drbd_conf *mdev, + struct socket *sock) { struct p_header80 *h = &mdev->tconn->data.rbuf.header.h80; int rr; @@ -926,8 +927,8 @@ out_release_sockets: return -1; } -static bool decode_header(struct drbd_conf *mdev, struct p_header *h, enum drbd_packets *cmd, - unsigned int *packet_size) +static bool decode_header(struct drbd_conf *mdev, struct p_header *h, + enum drbd_packet *cmd, unsigned int *packet_size) { if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) { *cmd = be16_to_cpu(h->h80.command); @@ -945,7 +946,8 @@ static bool decode_header(struct drbd_conf *mdev, struct p_header *h, enum drbd_ return true; } -static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsigned int *packet_size) +static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packet *cmd, + unsigned int *packet_size) { struct p_header *h = &mdev->tconn->data.rbuf.header; int r; @@ -1170,7 +1172,8 @@ fail: return err; } -static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { int rv; struct p_barrier *p = &mdev->tconn->data.rbuf.barrier; @@ -1499,7 +1502,8 @@ find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id, return NULL; } -static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { struct drbd_request *req; sector_t sector; @@ -1528,7 +1532,8 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi return ok; } -static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { sector_t sector; int ok; @@ -1681,7 +1686,8 @@ static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf) } /* mirrored write */ -static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { sector_t sector; struct drbd_epoch_entry *e; @@ -1966,7 +1972,8 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector) } -static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int digest_size) +static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int digest_size) { sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); @@ -2691,7 +2698,8 @@ static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self) return 1; } -static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_protocol(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { struct p_protocol *p = &mdev->tconn->data.rbuf.protocol; int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; @@ -2790,7 +2798,8 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, return tfm; } -static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int packet_size) +static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int packet_size) { int ok = true; struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95; @@ -2954,7 +2963,8 @@ static void warn_if_differ_considerably(struct drbd_conf *mdev, (unsigned long long)a, (unsigned long long)b); } -static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { struct p_sizes *p = &mdev->tconn->data.rbuf.sizes; enum determine_dev_size dd = unchanged; @@ -3057,7 +3067,8 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned return true; } -static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { struct p_uuids *p = &mdev->tconn->data.rbuf.uuids; u64 *p_uuid; @@ -3151,7 +3162,8 @@ static union drbd_state convert_state(union drbd_state ps) return ms; } -static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { struct p_req_state *p = &mdev->tconn->data.rbuf.req_state; union drbd_state mask, val; @@ -3177,7 +3189,8 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsi return true; } -static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { struct p_state *p = &mdev->tconn->data.rbuf.state; union drbd_state os, ns, peer_state; @@ -3329,7 +3342,8 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned return true; } -static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { struct p_rs_uuid *p = &mdev->tconn->data.rbuf.rs_uuid; @@ -3525,7 +3539,8 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev, in order to be agnostic to the 32 vs 64 bits issue. returns 0 on failure, 1 if we successfully received it. */ -static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { struct bm_xfer_ctx c; void *buffer; @@ -3616,7 +3631,8 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne return ok; } -static int receive_skip(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { /* TODO zero copy sink :) */ static char sink[128]; @@ -3636,7 +3652,8 @@ static int receive_skip(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned return size == 0; } -static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { /* Make sure we've acked all the TCP data associated * with the data requests being unplugged */ @@ -3645,7 +3662,8 @@ static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, u return true; } -static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { struct p_block_desc *p = &mdev->tconn->data.rbuf.block_desc; @@ -3664,7 +3682,8 @@ static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packets cmd, un return true; } -typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packets cmd, unsigned int to_receive); +typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packet cmd, + unsigned int to_receive); struct data_cmd { int expect_payload; @@ -3711,7 +3730,7 @@ static void drbdd(struct drbd_conf *mdev) { struct p_header *header = &mdev->tconn->data.rbuf.header; unsigned int packet_size; - enum drbd_packets cmd; + enum drbd_packet cmd; size_t shs; /* sub header size */ int rv; @@ -3938,7 +3957,7 @@ static int drbd_do_handshake(struct drbd_conf *mdev) struct p_handshake *p = &mdev->tconn->data.rbuf.handshake; const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80); unsigned int length; - enum drbd_packets cmd; + enum drbd_packet cmd; int rv; rv = drbd_send_handshake(mdev); @@ -4019,7 +4038,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) unsigned int key_len = strlen(mdev->tconn->net_conf->shared_secret); unsigned int resp_size; struct hash_desc desc; - enum drbd_packets cmd; + enum drbd_packet cmd; unsigned int length; int rv; @@ -4191,7 +4210,7 @@ int drbdd_init(struct drbd_thread *thi) /* ********* acknowledge sender ******** */ -static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packets cmd) +static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd) { struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply; @@ -4209,13 +4228,13 @@ static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packets cmd) return true; } -static int got_Ping(struct drbd_conf *mdev, enum drbd_packets cmd) +static int got_Ping(struct drbd_conf *mdev, enum drbd_packet cmd) { return drbd_send_ping_ack(mdev); } -static int got_PingAck(struct drbd_conf *mdev, enum drbd_packets cmd) +static int got_PingAck(struct drbd_conf *mdev, enum drbd_packet cmd) { /* restore idle timeout */ mdev->tconn->meta.socket->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ; @@ -4225,7 +4244,7 @@ static int got_PingAck(struct drbd_conf *mdev, enum drbd_packets cmd) return true; } -static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packets cmd) +static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packet cmd) { struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; sector_t sector = be64_to_cpu(p->sector); @@ -4270,7 +4289,7 @@ validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector, return true; } -static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packets cmd) +static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd) { struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; sector_t sector = be64_to_cpu(p->sector); @@ -4311,7 +4330,7 @@ static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packets cmd) what, false); } -static int got_NegAck(struct drbd_conf *mdev, enum drbd_packets cmd) +static int got_NegAck(struct drbd_conf *mdev, enum drbd_packet cmd) { struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; sector_t sector = be64_to_cpu(p->sector); @@ -4344,7 +4363,7 @@ static int got_NegAck(struct drbd_conf *mdev, enum drbd_packets cmd) return true; } -static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packets cmd) +static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd) { struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; sector_t sector = be64_to_cpu(p->sector); @@ -4358,7 +4377,7 @@ static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packets cmd) NEG_ACKED, false); } -static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packets cmd) +static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd) { sector_t sector; int size; @@ -4389,7 +4408,7 @@ static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packets cmd) return true; } -static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packets cmd) +static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd) { struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack; @@ -4405,7 +4424,7 @@ static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packets cmd) return true; } -static int got_OVResult(struct drbd_conf *mdev, enum drbd_packets cmd) +static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd) { struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; struct drbd_work *w; @@ -4449,14 +4468,14 @@ static int got_OVResult(struct drbd_conf *mdev, enum drbd_packets cmd) return true; } -static int got_skip(struct drbd_conf *mdev, enum drbd_packets cmd) +static int got_skip(struct drbd_conf *mdev, enum drbd_packet cmd) { return true; } struct asender_cmd { size_t pkt_size; - int (*process)(struct drbd_conf *mdev, enum drbd_packets cmd); + int (*process)(struct drbd_conf *mdev, enum drbd_packet cmd); }; static struct asender_cmd *get_asender_cmd(int cmd) @@ -4499,7 +4518,7 @@ int drbd_asender(struct drbd_thread *thi) int expect = sizeof(struct p_header); int ping_timeout_active = 0; int empty, pkt_size; - enum drbd_packets cmd_nr; + enum drbd_packet cmd_nr; sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); From bdc7adb006c6213190eedb8567962ff3d41d226d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 26 Jan 2011 17:49:00 +0100 Subject: [PATCH 044/609] drbd: Remove redundant initialization packet_seq is initialized by both sides of a connection in drbd_connect(). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 81bd1f3b135..b86ef59b521 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2953,7 +2953,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) atomic_set(&mdev->rs_pending_cnt, 0); atomic_set(&mdev->unacked_cnt, 0); atomic_set(&mdev->local_cnt, 0); - atomic_set(&mdev->packet_seq, 0); atomic_set(&mdev->pp_in_use, 0); atomic_set(&mdev->pp_in_use_by_net, 0); atomic_set(&mdev->rs_sect_in, 0); From cc378270e4abb9273ce1641d30bf6e84248f7b2e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 26 Jan 2011 18:01:50 +0100 Subject: [PATCH 045/609] drbd: Initialize the sequence number sent over the network even when not used Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index b86ef59b521..701f231cf4b 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2753,7 +2753,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, prepare_header(mdev, &p.head, cmd, sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size); p.sector = cpu_to_be64(e->i.sector); p.block_id = e->block_id; - /* p.seq_num = 0; No sequence numbers here.. */ + p.seq_num = 0; /* unused */ /* Only called by our kernel thread. * This one may be interrupted by DRBD_SIG and/or DRBD_SIGKILL From 3e394da184ab32d2c345fd459e1eeb7b9586bb4e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 26 Jan 2011 18:36:55 +0100 Subject: [PATCH 046/609] drbd: Move sequence number logic into drbd_receiver.c and simplify it These things are only used there. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 27 --------------------------- drivers/block/drbd/drbd_receiver.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 9f5c13513d6..cb45ca10d4b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -2355,33 +2355,6 @@ static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) return changed; } -static inline int seq_cmp(u32 a, u32 b) -{ - /* we assume wrap around at 32bit. - * for wrap around at 24bit (old atomic_t), - * we'd have to - * a <<= 8; b <<= 8; - */ - return (s32)(a) - (s32)(b); -} -#define seq_lt(a, b) (seq_cmp((a), (b)) < 0) -#define seq_gt(a, b) (seq_cmp((a), (b)) > 0) -#define seq_ge(a, b) (seq_cmp((a), (b)) >= 0) -#define seq_le(a, b) (seq_cmp((a), (b)) <= 0) -/* CAUTION: please no side effects in arguments! */ -#define seq_max(a, b) ((u32)(seq_gt((a), (b)) ? (a) : (b))) - -static inline void update_peer_seq(struct drbd_conf *mdev, unsigned int new_seq) -{ - unsigned int m; - spin_lock(&mdev->peer_seq_lock); - m = seq_max(mdev->peer_seq, new_seq); - mdev->peer_seq = m; - spin_unlock(&mdev->peer_seq_lock); - if (m == new_seq) - wake_up(&mdev->seq_wait); -} - static inline void drbd_update_congested(struct drbd_conf *mdev) { struct sock *sk = mdev->tconn->data.socket->sk; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 31f6875ceba..b4e1dab62dc 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1621,6 +1621,33 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u return ok; } +static bool seq_greater(u32 a, u32 b) +{ + /* + * We assume 32-bit wrap-around here. + * For 24-bit wrap-around, we would have to shift: + * a <<= 8; b <<= 8; + */ + return (s32)a - (s32)b > 0; +} + +static u32 seq_max(u32 a, u32 b) +{ + return seq_greater(a, b) ? a : b; +} + +static void update_peer_seq(struct drbd_conf *mdev, unsigned int new_seq) +{ + unsigned int m; + + spin_lock(&mdev->peer_seq_lock); + m = seq_max(mdev->peer_seq, new_seq); + mdev->peer_seq = m; + spin_unlock(&mdev->peer_seq_lock); + if (m == new_seq) + wake_up(&mdev->seq_wait); +} + /* Called from receive_Data. * Synchronize packets on sock with packets on msock. * @@ -1651,7 +1678,7 @@ static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq) spin_lock(&mdev->peer_seq_lock); for (;;) { prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE); - if (seq_le(packet_seq, mdev->peer_seq+1)) + if (!seq_greater(packet_seq, mdev->peer_seq + 1)) break; if (signal_pending(current)) { ret = -ERESTARTSYS; From 9e204cddaf76d19ce0e84f025b0946110694dbfb Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 26 Jan 2011 18:45:11 +0100 Subject: [PATCH 047/609] drbd: Move some functions to where they are used Move drbd_update_congested() to drbd_main.c, and drbd_req_new() and drbd_req_free() to drbd_req.c: those functions are not used anywhere else. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 7 ------- drivers/block/drbd/drbd_main.c | 7 +++++++ drivers/block/drbd/drbd_req.c | 29 +++++++++++++++++++++++++++++ drivers/block/drbd/drbd_req.h | 26 -------------------------- 4 files changed, 36 insertions(+), 33 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index cb45ca10d4b..7922fa0403d 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -2355,13 +2355,6 @@ static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) return changed; } -static inline void drbd_update_congested(struct drbd_conf *mdev) -{ - struct sock *sk = mdev->tconn->data.socket->sk; - if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5) - set_bit(NET_CONGESTED, &mdev->flags); -} - static inline int drbd_queue_order_type(struct drbd_conf *mdev) { /* sorry, we currently have no working implementation diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 701f231cf4b..5da1df023a4 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2528,6 +2528,13 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * return drop_it; /* && (mdev->state == R_PRIMARY) */; } +static void drbd_update_congested(struct drbd_conf *mdev) +{ + struct sock *sk = mdev->tconn->data.socket->sk; + if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5) + set_bit(NET_CONGESTED, &mdev->flags); +} + /* The idea of sendpage seems to be to put some kind of reference * to the page into the skb, and to hand it over to the NIC. In * this process get_page() gets called. diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 74179f7986e..25fa87c95a1 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -56,6 +56,35 @@ static void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req) part_stat_unlock(); } +static struct drbd_request *drbd_req_new(struct drbd_conf *mdev, + struct bio *bio_src) +{ + struct drbd_request *req; + + req = mempool_alloc(drbd_request_mempool, GFP_NOIO); + if (!req) + return NULL; + + drbd_req_make_private_bio(req, bio_src); + req->rq_state = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0; + req->mdev = mdev; + req->master_bio = bio_src; + req->epoch = 0; + drbd_clear_interval(&req->i); + req->i.sector = bio_src->bi_sector; + req->i.size = bio_src->bi_size; + INIT_LIST_HEAD(&req->tl_requests); + INIT_LIST_HEAD(&req->w.list); + + return req; +} + +static void drbd_req_free(struct drbd_request *req) +{ + mempool_free(req, drbd_request_mempool); +} + +/* rw is bio_data_dir(), only READ or WRITE */ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw) { const unsigned long s = req->rq_state; diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 4b0858bf286..431e3f962c3 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -234,32 +234,6 @@ static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bi bio->bi_next = NULL; } -static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev, - struct bio *bio_src) -{ - struct drbd_request *req = - mempool_alloc(drbd_request_mempool, GFP_NOIO); - if (likely(req)) { - drbd_req_make_private_bio(req, bio_src); - - req->rq_state = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0; - req->mdev = mdev; - req->master_bio = bio_src; - req->epoch = 0; - req->i.sector = bio_src->bi_sector; - req->i.size = bio_src->bi_size; - drbd_clear_interval(&req->i); - INIT_LIST_HEAD(&req->tl_requests); - INIT_LIST_HEAD(&req->w.list); - } - return req; -} - -static inline void drbd_req_free(struct drbd_request *req) -{ - mempool_free(req, drbd_request_mempool); -} - /* Short lived temporary struct on the stack. * We could squirrel the error to be returned into * bio->bi_size, or similar. But that would be too ugly. */ From a500c2efbbb3a57f83e18382e927b18513aca4cd Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 27 Jan 2011 14:12:23 +0100 Subject: [PATCH 048/609] drbd: struct drbd_request: Introduce a new collision flag This flag is set when a processes puts itself to sleep to wait for a conflicting request to complete. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 7 +++-- drivers/block/drbd/drbd_req.c | 42 +++--------------------------- drivers/block/drbd/drbd_req.h | 7 +++++ 3 files changed, 15 insertions(+), 41 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index b4e1dab62dc..d9f3f7fd9bb 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1815,6 +1815,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, first = 1; for (;;) { struct drbd_interval *i; + struct drbd_request *req2; int have_unacked = 0; int have_conflict = 0; prepare_to_wait(&mdev->misc_wait, &wait, @@ -1822,8 +1823,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, i = drbd_find_overlap(&mdev->write_requests, sector, size); if (i) { - struct drbd_request *req2 = - container_of(i, struct drbd_request, i); + req2 = container_of(i, struct drbd_request, i); /* only ALERT on first iteration, * we may be woken up early... */ @@ -1869,6 +1869,9 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, goto out_interrupted; } + /* Indicate to wake up mdev->misc_wait upon completion. */ + req2->rq_state |= RQ_COLLISION; + spin_unlock_irq(&mdev->tconn->req_lock); if (first) { first = 0; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 25fa87c95a1..8b4ba94538b 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -176,45 +176,9 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, req->epoch == mdev->tconn->newest_tle->br_number) queue_barrier(mdev); - /* we need to do the conflict detection stuff, - * if the epoch_entries tree is non-empty and - * this request has completed on the network */ - if ((s & RQ_NET_DONE) && !RB_EMPTY_ROOT(&mdev->epoch_entries)) { - const sector_t sector = req->i.sector; - const int size = req->i.size; - struct drbd_interval *i; - - /* ASSERT: - * there must be no conflicting requests, since - * they must have been failed on the spot */ - - i = drbd_find_overlap(&mdev->write_requests, sector, size); - if (i) { - struct drbd_request *req2 = - container_of(i, struct drbd_request, i); - - dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; " - "other: %p %llus +%u\n", - req, (unsigned long long)sector, size, - i, (unsigned long long)req2->i.sector, req2->i.size); - } - - /* maybe "wake" those conflicting epoch entries - * that wait for this request to finish. - * - * currently, there can be only _one_ such ee - * (well, or some more, which would be pending - * P_DISCARD_ACK not yet sent by the asender...), - * since we block the receiver thread upon the - * first conflict detection, which will wait on - * misc_wait. maybe we want to assert that? - * - * anyways, if we found one, - * we just have to do a wake_up. */ - i = drbd_find_overlap(&mdev->epoch_entries, sector, size); - if (i) - wake_up(&mdev->misc_wait); - } + /* Wake up any processes waiting for this request to complete. */ + if ((s & RQ_NET_DONE) && (s & RQ_COLLISION)) + wake_up(&mdev->misc_wait); } void complete_master_bio(struct drbd_conf *mdev, diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 431e3f962c3..7a7464a2b3a 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -194,6 +194,12 @@ enum drbd_req_state_bits { /* Should call drbd_al_complete_io() for this request... */ __RQ_IN_ACT_LOG, + + /* + * Set when a processes puts itself to sleep to wait for this request + * to complete. + */ + __RQ_COLLISION, }; #define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) @@ -214,6 +220,7 @@ enum drbd_req_state_bits { #define RQ_WRITE (1UL << __RQ_WRITE) #define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG) +#define RQ_COLLISION (1UL << __RQ_COLLISION) /* For waking up the frozen transfer log mod_req() has to return if the request should be counted in the epoch object*/ From 3e05146f0a9f28ef5959403eabf3239869476315 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 27 Jan 2011 16:20:57 +0100 Subject: [PATCH 049/609] drbd: Remove redundant check from drbd_contains_interval() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_interval.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_interval.c b/drivers/block/drbd/drbd_interval.c index b77a9bda03d..0d17eaa89a6 100644 --- a/drivers/block/drbd/drbd_interval.c +++ b/drivers/block/drbd/drbd_interval.c @@ -99,7 +99,7 @@ drbd_contains_interval(struct rb_root *root, sector_t sector, else if (interval > here) node = node->rb_right; else - return interval->sector == sector; + return true; } return false; } From 53840641bb1feff8c08acdba9de4c0f8b8674df5 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 28 Jan 2011 10:31:04 +0100 Subject: [PATCH 050/609] drbd: Allow to wait for the completion of an epoch entry as well Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_interval.h | 1 + drivers/block/drbd/drbd_receiver.c | 35 +++++++++++++++++++----------- drivers/block/drbd/drbd_req.c | 23 +++++++++++++++----- drivers/block/drbd/drbd_req.h | 7 ------ 4 files changed, 41 insertions(+), 25 deletions(-) diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h index a847b4a07b2..9d1e5eb2d7e 100644 --- a/drivers/block/drbd/drbd_interval.h +++ b/drivers/block/drbd/drbd_interval.h @@ -9,6 +9,7 @@ struct drbd_interval { sector_t sector; /* start sector of the interval */ unsigned int size; /* size in bytes */ sector_t end; /* highest interval end in subtree */ + int waiting:1; }; static inline void drbd_clear_interval(struct drbd_interval *i) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index d9f3f7fd9bb..b84a9c9fd3f 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -334,13 +334,15 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, goto fail; drbd_clear_interval(&e->i); + e->i.size = data_size; + e->i.sector = sector; + e->i.waiting = false; + e->epoch = NULL; e->mdev = mdev; e->pages = page; atomic_set(&e->pending_bios, 0); - e->i.size = data_size; e->flags = 0; - e->i.sector = sector; /* * The block_id is opaque to the receiver. It is not endianness * converted, and sent back to the sender unchanged. @@ -1172,6 +1174,19 @@ fail: return err; } +static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev, + struct drbd_epoch_entry *e) +{ + struct drbd_interval *i = &e->i; + + drbd_remove_interval(&mdev->write_requests, i); + drbd_clear_interval(i); + + /* Wake up any processes waiting for this epoch entry to complete. */ + if (i->waiting) + wake_up(&mdev->misc_wait); +} + static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd, unsigned int data_size) { @@ -1591,8 +1606,7 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) if (mdev->tconn->net_conf->two_primaries) { spin_lock_irq(&mdev->tconn->req_lock); D_ASSERT(!drbd_interval_empty(&e->i)); - drbd_remove_interval(&mdev->epoch_entries, &e->i); - drbd_clear_interval(&e->i); + drbd_remove_epoch_entry_interval(mdev, e); spin_unlock_irq(&mdev->tconn->req_lock); } else D_ASSERT(drbd_interval_empty(&e->i)); @@ -1612,8 +1626,7 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u spin_lock_irq(&mdev->tconn->req_lock); D_ASSERT(!drbd_interval_empty(&e->i)); - drbd_remove_interval(&mdev->epoch_entries, &e->i); - drbd_clear_interval(&e->i); + drbd_remove_epoch_entry_interval(mdev, e); spin_unlock_irq(&mdev->tconn->req_lock); dec_unacked(mdev); @@ -1860,17 +1873,14 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, } if (signal_pending(current)) { - drbd_remove_interval(&mdev->epoch_entries, &e->i); - drbd_clear_interval(&e->i); - + drbd_remove_epoch_entry_interval(mdev, e); spin_unlock_irq(&mdev->tconn->req_lock); - finish_wait(&mdev->misc_wait, &wait); goto out_interrupted; } /* Indicate to wake up mdev->misc_wait upon completion. */ - req2->rq_state |= RQ_COLLISION; + i->waiting = true; spin_unlock_irq(&mdev->tconn->req_lock); if (first) { @@ -1922,8 +1932,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, dev_err(DEV, "submit failed, triggering re-connect\n"); spin_lock_irq(&mdev->tconn->req_lock); list_del(&e->w.list); - drbd_remove_interval(&mdev->epoch_entries, &e->i); - drbd_clear_interval(&e->i); + drbd_remove_epoch_entry_interval(mdev, e); spin_unlock_irq(&mdev->tconn->req_lock); if (e->flags & EE_CALL_AL_COMPLETE_IO) drbd_al_complete_io(mdev, e->i.sector); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 8b4ba94538b..078f77ba68f 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -70,9 +70,12 @@ static struct drbd_request *drbd_req_new(struct drbd_conf *mdev, req->mdev = mdev; req->master_bio = bio_src; req->epoch = 0; + drbd_clear_interval(&req->i); req->i.sector = bio_src->bi_sector; req->i.size = bio_src->bi_size; + req->i.waiting = false; + INIT_LIST_HEAD(&req->tl_requests); INIT_LIST_HEAD(&req->w.list); @@ -175,10 +178,6 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, (s & RQ_NET_SENT) != 0 && req->epoch == mdev->tconn->newest_tle->br_number) queue_barrier(mdev); - - /* Wake up any processes waiting for this request to complete. */ - if ((s & RQ_NET_DONE) && (s & RQ_COLLISION)) - wake_up(&mdev->misc_wait); } void complete_master_bio(struct drbd_conf *mdev, @@ -188,6 +187,20 @@ void complete_master_bio(struct drbd_conf *mdev, dec_ap_bio(mdev); } + +static void drbd_remove_request_interval(struct rb_root *root, + struct drbd_request *req) +{ + struct drbd_conf *mdev = req->mdev; + struct drbd_interval *i = &req->i; + + drbd_remove_interval(root, i); + + /* Wake up any processes waiting for this request to complete. */ + if (i->waiting) + wake_up(&mdev->misc_wait); +} + /* Helper for __req_mod(). * Set m->bio to the master bio, if it is fit to be completed, * or leave it alone (it is initialized to NULL in __req_mod), @@ -251,7 +264,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) root = &mdev->write_requests; else root = &mdev->read_requests; - drbd_remove_interval(root, &req->i); + drbd_remove_request_interval(root, req); } else D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0); diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 7a7464a2b3a..431e3f962c3 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -194,12 +194,6 @@ enum drbd_req_state_bits { /* Should call drbd_al_complete_io() for this request... */ __RQ_IN_ACT_LOG, - - /* - * Set when a processes puts itself to sleep to wait for this request - * to complete. - */ - __RQ_COLLISION, }; #define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) @@ -220,7 +214,6 @@ enum drbd_req_state_bits { #define RQ_WRITE (1UL << __RQ_WRITE) #define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG) -#define RQ_COLLISION (1UL << __RQ_COLLISION) /* For waking up the frozen transfer log mod_req() has to return if the request should be counted in the epoch object*/ From 5e4722645afb27ee749ea65988544450f08f78ba Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 27 Jan 2011 14:42:51 +0100 Subject: [PATCH 051/609] drbd: _req_conflicts(): Get rid of the epoch_entries tree Instead of keeping a separate tree for local and remote write requests for finding requests and for conflict detection, use the same tree for both purposes. Introduce a flag to allow distinguishing the two possible types of entries in this tree. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 3 --- drivers/block/drbd/drbd_interval.h | 1 + drivers/block/drbd/drbd_main.c | 1 - drivers/block/drbd/drbd_receiver.c | 33 +++++++++++++++--------------- drivers/block/drbd/drbd_req.c | 28 ++++--------------------- drivers/block/drbd/drbd_worker.c | 2 +- 6 files changed, 22 insertions(+), 46 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 7922fa0403d..7fcda713714 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1045,9 +1045,6 @@ struct drbd_conf { struct list_head read_ee; /* IO in progress (any read) */ struct list_head net_ee; /* zero-copy network send in progress */ - /* Interval tree of pending remote write requests (struct drbd_epoch_entry) */ - struct rb_root epoch_entries; - /* this one is protected by ee_lock, single thread */ struct drbd_epoch_entry *last_write_w_barrier; diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h index 9d1e5eb2d7e..4010ad92394 100644 --- a/drivers/block/drbd/drbd_interval.h +++ b/drivers/block/drbd/drbd_interval.h @@ -9,6 +9,7 @@ struct drbd_interval { sector_t sector; /* start sector of the interval */ unsigned int size; /* size in bytes */ sector_t end; /* highest interval end in subtree */ + int local:1 /* local or remote request? */; int waiting:1; }; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 5da1df023a4..8c77476825e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3450,7 +3450,6 @@ struct drbd_conf *drbd_new_device(unsigned int minor) goto out_no_tl; mdev->read_requests = RB_ROOT; mdev->write_requests = RB_ROOT; - mdev->epoch_entries = RB_ROOT; mdev->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL); if (!mdev->current_epoch) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index b84a9c9fd3f..b063ca23446 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -336,6 +336,7 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, drbd_clear_interval(&e->i); e->i.size = data_size; e->i.sector = sector; + e->i.local = false; e->i.waiting = false; e->epoch = NULL; @@ -1508,7 +1509,7 @@ find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id, /* Request object according to our peer */ req = (struct drbd_request *)(unsigned long)id; - if (drbd_contains_interval(root, sector, &req->i)) + if (drbd_contains_interval(root, sector, &req->i) && req->i.local) return req; if (!missing_ok) { dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func, @@ -1788,17 +1789,12 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, /* conflict detection and handling: * 1. wait on the sequence number, * in case this data packet overtook ACK packets. - * 2. check our interval trees for conflicting requests: - * we only need to check the write_requests tree; the - * epoch_entries tree cannot contain any overlaps because - * they were already eliminated on the submitting node. + * 2. check for conflicting write requests. * * Note: for two_primaries, we are protocol C, * so there cannot be any request that is DONE * but still on the transfer log. * - * unconditionally add to the epoch_entries tree. - * * if no conflicting request is found: * submit. * @@ -1823,12 +1819,9 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, spin_lock_irq(&mdev->tconn->req_lock); - drbd_insert_interval(&mdev->epoch_entries, &e->i); - first = 1; for (;;) { struct drbd_interval *i; - struct drbd_request *req2; int have_unacked = 0; int have_conflict = 0; prepare_to_wait(&mdev->misc_wait, &wait, @@ -1836,18 +1829,23 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, i = drbd_find_overlap(&mdev->write_requests, sector, size); if (i) { - req2 = container_of(i, struct drbd_request, i); - /* only ALERT on first iteration, * we may be woken up early... */ if (first) - dev_alert(DEV, "%s[%u] Concurrent local write detected!" + dev_alert(DEV, "%s[%u] Concurrent %s write detected!" " new: %llus +%u; pending: %llus +%u\n", current->comm, current->pid, + i->local ? "local" : "remote", (unsigned long long)sector, size, - (unsigned long long)req2->i.sector, req2->i.size); - if (req2->rq_state & RQ_NET_PENDING) - ++have_unacked; + (unsigned long long)i->sector, i->size); + + if (i->local) { + struct drbd_request *req2; + + req2 = container_of(i, struct drbd_request, i); + if (req2->rq_state & RQ_NET_PENDING) + ++have_unacked; + } ++have_conflict; } if (!have_conflict) @@ -1873,7 +1871,6 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, } if (signal_pending(current)) { - drbd_remove_epoch_entry_interval(mdev, e); spin_unlock_irq(&mdev->tconn->req_lock); finish_wait(&mdev->misc_wait, &wait); goto out_interrupted; @@ -1896,6 +1893,8 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, spin_lock_irq(&mdev->tconn->req_lock); } finish_wait(&mdev->misc_wait, &wait); + + drbd_insert_interval(&mdev->write_requests, &e->i); } list_add(&e->w.list, &mdev->active_ee); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 078f77ba68f..df5f1062732 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -74,6 +74,7 @@ static struct drbd_request *drbd_req_new(struct drbd_conf *mdev, drbd_clear_interval(&req->i); req->i.sector = bio_src->bi_sector; req->i.size = bio_src->bi_size; + req->i.local = true; req->i.waiting = false; INIT_LIST_HEAD(&req->tl_requests); @@ -317,8 +318,6 @@ static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_e * to happen, but this is the rationale why we also have to check for * conflicting requests with local origin, and why we have to do so regardless * of whether we allowed multiple primaries. - * - * In case we only have one primary, the epoch_entries tree is empty. */ static int _req_conflicts(struct drbd_request *req) { @@ -334,35 +333,16 @@ static int _req_conflicts(struct drbd_request *req) i = drbd_find_overlap(&mdev->write_requests, sector, size); if (i) { - struct drbd_request *req2 = - container_of(i, struct drbd_request, i); - - dev_alert(DEV, "%s[%u] Concurrent local write detected! " + dev_alert(DEV, "%s[%u] Concurrent %s write detected! " "[DISCARD L] new: %llus +%u; " "pending: %llus +%u\n", current->comm, current->pid, + i->local ? "local" : "remote", (unsigned long long)sector, size, - (unsigned long long)req2->i.sector, req2->i.size); + (unsigned long long)i->sector, i->size); goto out_conflict; } - if (!RB_EMPTY_ROOT(&mdev->epoch_entries)) { - /* check for overlapping requests with remote origin */ - i = drbd_find_overlap(&mdev->epoch_entries, sector, size); - if (i) { - struct drbd_epoch_entry *e = - container_of(i, struct drbd_epoch_entry, i); - - dev_alert(DEV, "%s[%u] Concurrent remote write detected!" - " [DISCARD L] new: %llus +%u; " - "pending: %llus +%u\n", - current->comm, current->pid, - (unsigned long long)sector, size, - (unsigned long long)e->i.sector, e->i.size); - goto out_conflict; - } - } - /* this is like it should be, and what we expected. * our users do behave after all... */ put_net_conf(mdev->tconn); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index afad8ea4d88..0359600f563 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -123,7 +123,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo list_add_tail(&e->w.list, &mdev->done_ee); /* - * Do not remove from the epoch_entries tree here: we did not send the + * Do not remove from the write_requests tree here: we did not send the * Ack yet and did not wake possibly waiting conflicting requests. * Removed from the tree from "drbd_process_done_ee" within the * appropriate w.cb (e_end_block/e_end_resync_block) or from From ddd8877d3169ebda7272667fc3dc9768204a157f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 28 Jan 2011 14:24:05 +0100 Subject: [PATCH 052/609] drbd: Remove unnecessary reference counting left-over Nothing in this function accesses mdev->tconn->net_conf, so there is no need for get_net_conf() / put_net_conf() anymore. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index df5f1062732..e11ea475a4a 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -328,9 +328,6 @@ static int _req_conflicts(struct drbd_request *req) D_ASSERT(drbd_interval_empty(&req->i)); - if (!get_net_conf(mdev->tconn)) - return 0; - i = drbd_find_overlap(&mdev->write_requests, sector, size); if (i) { dev_alert(DEV, "%s[%u] Concurrent %s write detected! " @@ -340,17 +337,9 @@ static int _req_conflicts(struct drbd_request *req) i->local ? "local" : "remote", (unsigned long long)sector, size, (unsigned long long)i->sector, i->size); - goto out_conflict; + return 1; } - - /* this is like it should be, and what we expected. - * our users do behave after all... */ - put_net_conf(mdev->tconn); return 0; - -out_conflict: - put_net_conf(mdev->tconn); - return 1; } /* obviously this could be coded as many single functions From 6024fece739518c4c101c767d527fd624b096a34 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 28 Jan 2011 15:53:51 +0100 Subject: [PATCH 053/609] drbd: Defer new writes when detecting conflicting writes Before submitting a new local write request, wait for any conflicting local or remote requests to complete. We could assume that the new request occurred first and that the conflicting requests overwrote it (and therefore discard the new reques), but we know for sure that the new request occurred after the conflicting requests and so this behavior would we weird. We would also end up with the wrong result if the new request is not fully contained within the conflicting requests. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 103 +++++++++++++--------------------- 1 file changed, 39 insertions(+), 64 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index e11ea475a4a..6bcf4171a76 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -300,48 +300,6 @@ static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_e _req_may_be_done(req, m); } -/* - * checks whether there was an overlapping request - * or ee already registered. - * - * if so, return 1, in which case this request is completed on the spot, - * without ever being submitted or send. - * - * return 0 if it is ok to submit this request. - * - * NOTE: - * paranoia: assume something above us is broken, and issues different write - * requests for the same block simultaneously... - * - * To ensure these won't be reordered differently on both nodes, resulting in - * diverging data sets, we discard the later one(s). Not that this is supposed - * to happen, but this is the rationale why we also have to check for - * conflicting requests with local origin, and why we have to do so regardless - * of whether we allowed multiple primaries. - */ -static int _req_conflicts(struct drbd_request *req) -{ - struct drbd_conf *mdev = req->mdev; - const sector_t sector = req->i.sector; - const int size = req->i.size; - struct drbd_interval *i; - - D_ASSERT(drbd_interval_empty(&req->i)); - - i = drbd_find_overlap(&mdev->write_requests, sector, size); - if (i) { - dev_alert(DEV, "%s[%u] Concurrent %s write detected! " - "[DISCARD L] new: %llus +%u; " - "pending: %llus +%u\n", - current->comm, current->pid, - i->local ? "local" : "remote", - (unsigned long long)sector, size, - (unsigned long long)i->sector, i->size); - return 1; - } - return 0; -} - /* obviously this could be coded as many single functions * instead of one huge switch, * or by putting the code directly in the respective locations @@ -721,6 +679,34 @@ static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int s return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr); } +/* + * complete_conflicting_writes - wait for any conflicting write requests + * + * The write_requests tree contains all active write requests which we + * currently know about. Wait for any requests to complete which conflict with + * the new one. + */ +static int complete_conflicting_writes(struct drbd_conf *mdev, + sector_t sector, int size) +{ + for(;;) { + DEFINE_WAIT(wait); + struct drbd_interval *i; + + i = drbd_find_overlap(&mdev->write_requests, sector, size); + if (!i) + return 0; + i->waiting = true; + prepare_to_wait(&mdev->misc_wait, &wait, TASK_INTERRUPTIBLE); + spin_unlock_irq(&mdev->tconn->req_lock); + schedule(); + finish_wait(&mdev->misc_wait, &wait); + spin_lock_irq(&mdev->tconn->req_lock); + if (signal_pending(current)) + return -ERESTARTSYS; + } +} + static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time) { const int rw = bio_rw(bio); @@ -729,7 +715,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns struct drbd_tl_epoch *b = NULL; struct drbd_request *req; int local, remote, send_oos = 0; - int err = -EIO; + int err; int ret = 0; /* allocate outside of all locks; */ @@ -799,6 +785,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns if (!(local || remote) && !is_susp(mdev->state)) { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); + err = -EIO; goto fail_free_complete; } @@ -823,6 +810,14 @@ allocate_barrier: /* GOOD, everything prepared, grab the spin_lock */ spin_lock_irq(&mdev->tconn->req_lock); + if (rw == WRITE) { + err = complete_conflicting_writes(mdev, sector, size); + if (err) { + spin_unlock_irq(&mdev->tconn->req_lock); + goto fail_free_complete; + } + } + if (is_susp(mdev->state)) { /* If we got suspended, use the retry mechanism of generic_make_request() to restart processing of this @@ -843,6 +838,7 @@ allocate_barrier: if (!(local || remote)) { dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); spin_unlock_irq(&mdev->tconn->req_lock); + err = -EIO; goto fail_free_complete; } } @@ -903,12 +899,6 @@ allocate_barrier: if (local) _req_mod(req, TO_BE_SUBMITTED); - /* check this request on the collision detection hash tables. - * if we have a conflict, just complete it here. - * THINK do we want to check reads, too? (I don't think so...) */ - if (rw == WRITE && _req_conflicts(req)) - goto fail_conflicting; - list_add_tail(&req->tl_requests, &mdev->tconn->newest_tle->requests); /* NOTE remote first: to get the concurrent write detection right, @@ -975,21 +965,6 @@ allocate_barrier: return 0; -fail_conflicting: - /* this is a conflicting request. - * even though it may have been only _partially_ - * overlapping with one of the currently pending requests, - * without even submitting or sending it, we will - * pretend that it was successfully served right now. - */ - _drbd_end_io_acct(mdev, req); - spin_unlock_irq(&mdev->tconn->req_lock); - if (remote) - dec_ap_pending(mdev); - /* THINK: do we want to fail it (-EIO), or pretend success? - * this pretends success. */ - err = 0; - fail_free_complete: if (req->rq_state & RQ_IN_ACT_LOG) drbd_al_complete_io(mdev, sector); From 43ae077d0a1e98dd13112646fe967565febf4fe7 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 3 Feb 2011 18:42:08 +0100 Subject: [PATCH 054/609] drbd: Make the peer_seq updating code more obvious Make it more clear that update_peer_seq() is supposed to wake up the seq_wait queue whenever the sequence number changes. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index b063ca23446..bc5351df807 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1650,15 +1650,15 @@ static u32 seq_max(u32 a, u32 b) return seq_greater(a, b) ? a : b; } -static void update_peer_seq(struct drbd_conf *mdev, unsigned int new_seq) +static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq) { - unsigned int m; + unsigned int old_peer_seq; spin_lock(&mdev->peer_seq_lock); - m = seq_max(mdev->peer_seq, new_seq); - mdev->peer_seq = m; + old_peer_seq = mdev->peer_seq; + mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq); spin_unlock(&mdev->peer_seq_lock); - if (m == new_seq) + if (old_peer_seq != peer_seq) wake_up(&mdev->seq_wait); } From 70b1987663851f4431a2f43d8ccefb7b6ac73331 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 4 Feb 2011 12:11:05 +0100 Subject: [PATCH 055/609] drbd: Improve the drbd_find_overlap() documentation Describe how to reach any further overlapping intervals from the first overlap found. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_interval.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_interval.c b/drivers/block/drbd/drbd_interval.c index 0d17eaa89a6..14dbe2dd2d3 100644 --- a/drivers/block/drbd/drbd_interval.c +++ b/drivers/block/drbd/drbd_interval.c @@ -122,9 +122,11 @@ drbd_remove_interval(struct rb_root *root, struct drbd_interval *this) * @sector: start sector * @size: size, aligned to 512 bytes * - * Returns the interval overlapping with [sector, sector + size), or NULL. - * When there is more than one overlapping interval in the tree, the interval - * with the lowest start sector is returned. + * Returns an interval overlapping with [sector, sector + size), or NULL if + * there is none. When there is more than one overlapping interval in the + * tree, the interval with the lowest start sector is returned, and all other + * overlapping intervals will be on the right side of the tree, reachable with + * rb_next(). */ struct drbd_interval * drbd_find_overlap(struct rb_root *root, sector_t sector, unsigned int size) From c6f7df42c9ceddc5ef582f6044b15e50e6eeb053 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 4 Feb 2011 15:10:57 +0100 Subject: [PATCH 056/609] drbd: Remove unused variable in struct drbd_conf Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 7fcda713714..768656a1e82 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1045,9 +1045,6 @@ struct drbd_conf { struct list_head read_ee; /* IO in progress (any read) */ struct list_head net_ee; /* zero-copy network send in progress */ - /* this one is protected by ee_lock, single thread */ - struct drbd_epoch_entry *last_write_w_barrier; - int next_barrier_nr; struct list_head resync_reads; atomic_t pp_in_use; /* allocated from page pool */ From f6ffca9f42902556bcf72426d2d0714bdbfdbe09 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 4 Feb 2011 15:30:34 +0100 Subject: [PATCH 057/609] drbd: Rename struct drbd_epoch_entry to struct drbd_peer_request Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 37 +++++++++++------------ drivers/block/drbd/drbd_main.c | 8 ++--- drivers/block/drbd/drbd_nl.c | 7 ++--- drivers/block/drbd/drbd_receiver.c | 48 +++++++++++++++--------------- drivers/block/drbd/drbd_worker.c | 25 ++++++++-------- 5 files changed, 61 insertions(+), 64 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 768656a1e82..696ff3cdb2f 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -707,7 +707,7 @@ struct digest_info { void *digest; }; -struct drbd_epoch_entry { +struct drbd_peer_request { struct drbd_work w; struct drbd_epoch *epoch; /* for writes */ struct drbd_conf *mdev; @@ -1194,8 +1194,8 @@ extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packet cmd, extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc); extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size); -extern int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, - struct drbd_epoch_entry *e); +extern int drbd_send_ack(struct drbd_conf *, enum drbd_packet, + struct drbd_peer_request *); extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd, struct p_block_req *rp); extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd, @@ -1203,8 +1203,8 @@ extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd, extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd, sector_t sector, int blksize, u64 block_id); extern int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req); -extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, - struct drbd_epoch_entry *e); +extern int drbd_send_block(struct drbd_conf *, enum drbd_packet, + struct drbd_peer_request *); extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req); extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd, sector_t sector, int size, u64 block_id); @@ -1500,7 +1500,8 @@ static inline void ov_oos_print(struct drbd_conf *mdev) extern void drbd_csum_bio(struct drbd_conf *, struct crypto_hash *, struct bio *, void *); -extern void drbd_csum_ee(struct drbd_conf *, struct crypto_hash *, struct drbd_epoch_entry *, void *); +extern void drbd_csum_ee(struct drbd_conf *, struct crypto_hash *, + struct drbd_peer_request *, void *); /* worker callbacks */ extern int w_req_cancel_conflict(struct drbd_conf *, struct drbd_work *, int); extern int w_read_retry_remote(struct drbd_conf *, struct drbd_work *, int); @@ -1527,16 +1528,14 @@ extern void start_resync_timer_fn(unsigned long data); /* drbd_receiver.c */ extern int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector); -extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, - const unsigned rw, const int fault_type); +extern int drbd_submit_ee(struct drbd_conf *, struct drbd_peer_request *, + const unsigned, const int); extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); -extern struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, - u64 id, - sector_t sector, - unsigned int data_size, - gfp_t gfp_mask) __must_hold(local); -extern void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, - int is_net); +extern struct drbd_peer_request *drbd_alloc_ee(struct drbd_conf *, + u64, sector_t, unsigned int, + gfp_t) __must_hold(local); +extern void drbd_free_some_ee(struct drbd_conf *, struct drbd_peer_request *, + int); #define drbd_free_ee(m,e) drbd_free_some_ee(m, e, 0) #define drbd_free_net_ee(m,e) drbd_free_some_ee(m, e, 1) extern void drbd_wait_ee_list_empty(struct drbd_conf *mdev, @@ -1627,10 +1626,8 @@ void drbd_nl_cleanup(void); int __init drbd_nl_init(void); void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state); void drbd_bcast_sync_progress(struct drbd_conf *mdev); -void drbd_bcast_ee(struct drbd_conf *mdev, - const char *reason, const int dgs, - const char* seen_hash, const char* calc_hash, - const struct drbd_epoch_entry* e); +void drbd_bcast_ee(struct drbd_conf *, const char *, const int, const char *, + const char *, const struct drbd_peer_request *); /** @@ -1713,7 +1710,7 @@ static inline int drbd_bio_has_active_page(struct bio *bio) return 0; } -static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e) +static inline int drbd_ee_has_active_page(struct drbd_peer_request *e) { struct page *page = e->pages; page_chain_for_each(page) { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 8c77476825e..62f20dfc9b5 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2434,7 +2434,7 @@ int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd, * @e: Epoch entry. */ int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, - struct drbd_epoch_entry *e) + struct drbd_peer_request *e) { return _drbd_send_ack(mdev, cmd, cpu_to_be64(e->i.sector), @@ -2641,7 +2641,7 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) return 1; } -static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) +static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_peer_request *e) { struct page *page = e->pages; unsigned len = e->i.size; @@ -2747,7 +2747,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) * C_SYNC_SOURCE -> C_SYNC_TARGET (P_RS_DATA_REPLY) */ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, - struct drbd_epoch_entry *e) + struct drbd_peer_request *e) { int ok; struct p_data p; @@ -3147,7 +3147,7 @@ static int drbd_create_mempools(void) goto Enomem; drbd_ee_cache = kmem_cache_create( - "drbd_ee", sizeof(struct drbd_epoch_entry), 0, 0, NULL); + "drbd_ee", sizeof(struct drbd_peer_request), 0, 0, NULL); if (drbd_ee_cache == NULL) goto Enomem; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 8b8894e10e6..ee00ffa0465 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2443,10 +2443,9 @@ void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name) cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); } -void drbd_bcast_ee(struct drbd_conf *mdev, - const char *reason, const int dgs, - const char* seen_hash, const char* calc_hash, - const struct drbd_epoch_entry* e) +void drbd_bcast_ee(struct drbd_conf *mdev, const char *reason, const int dgs, + const char *seen_hash, const char *calc_hash, + const struct drbd_peer_request *e) { struct cn_msg *cn_reply; struct drbd_nl_cfg_reply *reply; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index bc5351df807..e061aca2d93 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -189,7 +189,7 @@ static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed) { - struct drbd_epoch_entry *e; + struct drbd_peer_request *e; struct list_head *le, *tle; /* The EEs are always appended to the end of the list. Since @@ -198,7 +198,7 @@ static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed stop to examine the list... */ list_for_each_safe(le, tle, &mdev->net_ee) { - e = list_entry(le, struct drbd_epoch_entry, w.list); + e = list_entry(le, struct drbd_peer_request, w.list); if (drbd_ee_has_active_page(e)) break; list_move(le, to_be_freed); @@ -208,7 +208,7 @@ static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev) { LIST_HEAD(reclaimed); - struct drbd_epoch_entry *e, *t; + struct drbd_peer_request *e, *t; spin_lock_irq(&mdev->tconn->req_lock); reclaim_net_ee(mdev, &reclaimed); @@ -309,13 +309,11 @@ You must not have the req_lock: drbd_wait_ee_list_empty() */ -struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, - u64 id, - sector_t sector, - unsigned int data_size, - gfp_t gfp_mask) __must_hold(local) +struct drbd_peer_request * +drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector, + unsigned int data_size, gfp_t gfp_mask) __must_hold(local) { - struct drbd_epoch_entry *e; + struct drbd_peer_request *e; struct page *page; unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; @@ -357,7 +355,8 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, return NULL; } -void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int is_net) +void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *e, + int is_net) { if (e->flags & EE_HAS_DIGEST) kfree(e->digest); @@ -370,7 +369,7 @@ void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int i int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list) { LIST_HEAD(work_list); - struct drbd_epoch_entry *e, *t; + struct drbd_peer_request *e, *t; int count = 0; int is_net = list == &mdev->net_ee; @@ -399,7 +398,7 @@ static int drbd_process_done_ee(struct drbd_conf *mdev) { LIST_HEAD(work_list); LIST_HEAD(reclaimed); - struct drbd_epoch_entry *e, *t; + struct drbd_peer_request *e, *t; int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS); spin_lock_irq(&mdev->tconn->req_lock); @@ -1100,8 +1099,8 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) * on certain Xen deployments. */ /* TODO allocate from our own bio_set. */ -int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, - const unsigned rw, const int fault_type) +int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_peer_request *e, + const unsigned rw, const int fault_type) { struct bio *bios = NULL; struct bio *bio; @@ -1176,7 +1175,7 @@ fail: } static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev, - struct drbd_epoch_entry *e) + struct drbd_peer_request *e) { struct drbd_interval *i = &e->i; @@ -1262,11 +1261,12 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd, /* used from receive_RSDataReply (recv_resync_read) * and from receive_Data */ -static struct drbd_epoch_entry * -read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __must_hold(local) +static struct drbd_peer_request * +read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, + int data_size) __must_hold(local) { const sector_t capacity = drbd_get_capacity(mdev->this_bdev); - struct drbd_epoch_entry *e; + struct drbd_peer_request *e; struct page *page; int dgs, ds, rr; void *dig_in = mdev->tconn->int_dig_in; @@ -1445,7 +1445,7 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, * drbd_process_done_ee() by asender only */ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused) { - struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; + struct drbd_peer_request *e = (struct drbd_peer_request *)w; sector_t sector = e->i.sector; int ok; @@ -1467,7 +1467,7 @@ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local) { - struct drbd_epoch_entry *e; + struct drbd_peer_request *e; e = read_in_block(mdev, ID_SYNCER, sector, data_size); if (!e) @@ -1582,7 +1582,7 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd, */ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; + struct drbd_peer_request *e = (struct drbd_peer_request *)w; sector_t sector = e->i.sector; int ok = 1, pcmd; @@ -1619,7 +1619,7 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int unused) { - struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; + struct drbd_peer_request *e = (struct drbd_peer_request *)w; int ok = 1; D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); @@ -1731,7 +1731,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, unsigned int data_size) { sector_t sector; - struct drbd_epoch_entry *e; + struct drbd_peer_request *e; struct p_data *p = &mdev->tconn->data.rbuf.data; int rw = WRITE; u32 dp_flags; @@ -2015,7 +2015,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, { sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); - struct drbd_epoch_entry *e; + struct drbd_peer_request *e; struct digest_info *di = NULL; int size, verb; unsigned int fault_type; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 0359600f563..06628d1504b 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -80,7 +80,7 @@ void drbd_md_io_complete(struct bio *bio, int error) /* reads on behalf of the partner, * "submitted" by the receiver */ -void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local) +void drbd_endio_read_sec_final(struct drbd_peer_request *e) __releases(local) { unsigned long flags = 0; struct drbd_conf *mdev = e->mdev; @@ -100,7 +100,7 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local) /* writes on behalf of the partner, or resync writes, * "submitted" by the receiver, final stage. */ -static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(local) +static void drbd_endio_write_sec_final(struct drbd_peer_request *e) __releases(local) { unsigned long flags = 0; struct drbd_conf *mdev = e->mdev; @@ -154,7 +154,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo */ void drbd_endio_sec(struct bio *bio, int error) { - struct drbd_epoch_entry *e = bio->bi_private; + struct drbd_peer_request *e = bio->bi_private; struct drbd_conf *mdev = e->mdev; int uptodate = bio_flagged(bio, BIO_UPTODATE); int is_write = bio_data_dir(bio) == WRITE; @@ -247,7 +247,8 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return w_send_read_req(mdev, w, 0); } -void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, struct drbd_epoch_entry *e, void *digest) +void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, + struct drbd_peer_request *e, void *digest) { struct hash_desc desc; struct scatterlist sg; @@ -297,7 +298,7 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio * /* TODO merge common code with w_e_end_ov_req */ int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); + struct drbd_peer_request *e = container_of(w, struct drbd_peer_request, w); int digest_size; void *digest; int ok = 1; @@ -344,7 +345,7 @@ out: static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) { - struct drbd_epoch_entry *e; + struct drbd_peer_request *e; if (!get_ldev(mdev)) return -EIO; @@ -900,7 +901,7 @@ out: } /* helper */ -static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_epoch_entry *e) +static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_request *e) { if (drbd_ee_has_active_page(e)) { /* This might happen if sendpage() has not finished */ @@ -923,7 +924,7 @@ static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_epoch_ent */ int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); + struct drbd_peer_request *e = container_of(w, struct drbd_peer_request, w); int ok; if (unlikely(cancel)) { @@ -959,7 +960,7 @@ int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) */ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); + struct drbd_peer_request *e = container_of(w, struct drbd_peer_request, w); int ok; if (unlikely(cancel)) { @@ -1007,7 +1008,7 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); + struct drbd_peer_request *e = container_of(w, struct drbd_peer_request, w); struct digest_info *di; int digest_size; void *digest = NULL; @@ -1070,7 +1071,7 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) /* TODO merge common code with w_e_send_csum */ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); + struct drbd_peer_request *e = container_of(w, struct drbd_peer_request, w); sector_t sector = e->i.sector; unsigned int size = e->i.size; int digest_size; @@ -1127,7 +1128,7 @@ void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size) int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); + struct drbd_peer_request *e = container_of(w, struct drbd_peer_request, w); struct digest_info *di; void *digest; sector_t sector = e->i.sector; From 18b75d756bdd6e87e5c4a46d6d1f279077425dae Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 4 Feb 2011 15:36:22 +0100 Subject: [PATCH 058/609] drbd: Clean up some left-overs Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 696ff3cdb2f..e6cc6301db0 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -668,15 +668,6 @@ struct drbd_tl_epoch { int n_writes; /* number of requests attached before this barrier */ }; -struct drbd_request; - -/* These Tl_epoch_entries may be in one of 6 lists: - active_ee .. data packet being written - sync_ee .. syncer block being written - done_ee .. block written, need to send P_WRITE_ACK - read_ee .. [RS]P_DATA_REQUEST being read -*/ - struct drbd_epoch { struct list_head list; unsigned int barrier_nr; @@ -1041,8 +1032,8 @@ struct drbd_conf { enum write_ordering_e write_ordering; struct list_head active_ee; /* IO in progress (P_DATA gets written to disk) */ struct list_head sync_ee; /* IO in progress (P_RS_DATA_REPLY gets written to disk) */ - struct list_head done_ee; /* send ack */ - struct list_head read_ee; /* IO in progress (any read) */ + struct list_head done_ee; /* need to send P_WRITE_ACK */ + struct list_head read_ee; /* [RS]P_DATA_REQUEST being read */ struct list_head net_ee; /* zero-copy network send in progress */ int next_barrier_nr; From 6c852beca185b18e89ad7783ab15793c0911f86b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 4 Feb 2011 15:38:52 +0100 Subject: [PATCH 059/609] drbd: Update some comments Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 ++-- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_receiver.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e6cc6301db0..c7504579c46 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -727,7 +727,7 @@ enum { * we need to resubmit without the barrier flag. */ __EE_RESUBMITTED, - /* we may have several bios per epoch entry. + /* we may have several bios per peer request. * if any of those fail, we set this flag atomically * from the endio callback */ __EE_WAS_ERROR, @@ -1422,7 +1422,7 @@ extern void drbd_bm_unlock(struct drbd_conf *mdev); /* drbd_main.c */ extern struct kmem_cache *drbd_request_cache; -extern struct kmem_cache *drbd_ee_cache; /* epoch entries */ +extern struct kmem_cache *drbd_ee_cache; /* peer requests */ extern struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */ extern struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ extern mempool_t *drbd_request_mempool; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 62f20dfc9b5..3bc900f48f9 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -135,7 +135,7 @@ struct drbd_conf **minor_table; struct list_head drbd_tconns; /* list of struct drbd_tconn */ struct kmem_cache *drbd_request_cache; -struct kmem_cache *drbd_ee_cache; /* epoch entries */ +struct kmem_cache *drbd_ee_cache; /* peer requests */ struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */ struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ mempool_t *drbd_request_mempool; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e061aca2d93..6ba94febfab 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1085,7 +1085,7 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) /** * drbd_submit_ee() * @mdev: DRBD device. - * @e: epoch entry + * @e: peer request * @rw: flag field, see bio->bi_rw * * May spread the pages to multiple bios, @@ -1182,7 +1182,7 @@ static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev, drbd_remove_interval(&mdev->write_requests, i); drbd_clear_interval(i); - /* Wake up any processes waiting for this epoch entry to complete. */ + /* Wake up any processes waiting for this peer request to complete. */ if (i->waiting) wake_up(&mdev->misc_wait); } From db830c464b69e26ea4d371e38bb2320c99c82f41 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 4 Feb 2011 15:57:48 +0100 Subject: [PATCH 060/609] drbd: Local variable renames: e -> peer_req Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 +- drivers/block/drbd/drbd_main.c | 36 ++-- drivers/block/drbd/drbd_nl.c | 18 +- drivers/block/drbd/drbd_receiver.c | 254 ++++++++++++++--------------- drivers/block/drbd/drbd_worker.c | 197 +++++++++++----------- 5 files changed, 258 insertions(+), 251 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c7504579c46..302ccc6d943 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1701,9 +1701,9 @@ static inline int drbd_bio_has_active_page(struct bio *bio) return 0; } -static inline int drbd_ee_has_active_page(struct drbd_peer_request *e) +static inline int drbd_ee_has_active_page(struct drbd_peer_request *peer_req) { - struct page *page = e->pages; + struct page *page = peer_req->pages; page_chain_for_each(page) { if (page_count(page) > 1) return 1; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 3bc900f48f9..7728d161340 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2429,17 +2429,17 @@ int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd, /** * drbd_send_ack() - Sends an ack packet - * @mdev: DRBD device. - * @cmd: Packet command code. - * @e: Epoch entry. + * @mdev: DRBD device + * @cmd: packet command code + * @peer_req: peer request */ int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, - struct drbd_peer_request *e) + struct drbd_peer_request *peer_req) { return _drbd_send_ack(mdev, cmd, - cpu_to_be64(e->i.sector), - cpu_to_be32(e->i.size), - e->block_id); + cpu_to_be64(peer_req->i.sector), + cpu_to_be32(peer_req->i.size), + peer_req->block_id); } /* This function misuses the block_id field to signal if the blocks @@ -2641,10 +2641,12 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) return 1; } -static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_peer_request *e) +static int _drbd_send_zc_ee(struct drbd_conf *mdev, + struct drbd_peer_request *peer_req) { - struct page *page = e->pages; - unsigned len = e->i.size; + struct page *page = peer_req->pages; + unsigned len = peer_req->i.size; + /* hint all but last page with MSG_MORE */ page_chain_for_each(page) { unsigned l = min_t(unsigned, len, PAGE_SIZE); @@ -2747,7 +2749,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) * C_SYNC_SOURCE -> C_SYNC_TARGET (P_RS_DATA_REPLY) */ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, - struct drbd_peer_request *e) + struct drbd_peer_request *peer_req) { int ok; struct p_data p; @@ -2757,9 +2759,11 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ? crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0; - prepare_header(mdev, &p.head, cmd, sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size); - p.sector = cpu_to_be64(e->i.sector); - p.block_id = e->block_id; + prepare_header(mdev, &p.head, cmd, sizeof(p) - + sizeof(struct p_header80) + + dgs + peer_req->i.size); + p.sector = cpu_to_be64(peer_req->i.sector); + p.block_id = peer_req->block_id; p.seq_num = 0; /* unused */ /* Only called by our kernel thread. @@ -2772,11 +2776,11 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, ok = sizeof(p) == drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0); if (ok && dgs) { dgb = mdev->tconn->int_dig_out; - drbd_csum_ee(mdev, mdev->tconn->integrity_w_tfm, e, dgb); + drbd_csum_ee(mdev, mdev->tconn->integrity_w_tfm, peer_req, dgb); ok = dgs == drbd_send(mdev, mdev->tconn->data.socket, dgb, dgs, 0); } if (ok) - ok = _drbd_send_zc_ee(mdev, e); + ok = _drbd_send_zc_ee(mdev, peer_req); drbd_put_data_sock(mdev); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ee00ffa0465..e30d52ba3fc 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2445,7 +2445,7 @@ void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name) void drbd_bcast_ee(struct drbd_conf *mdev, const char *reason, const int dgs, const char *seen_hash, const char *calc_hash, - const struct drbd_peer_request *e) + const struct drbd_peer_request *peer_req) { struct cn_msg *cn_reply; struct drbd_nl_cfg_reply *reply; @@ -2453,7 +2453,7 @@ void drbd_bcast_ee(struct drbd_conf *mdev, const char *reason, const int dgs, struct page *page; unsigned len; - if (!e) + if (!peer_req) return; if (!reason || !reason[0]) return; @@ -2472,8 +2472,10 @@ void drbd_bcast_ee(struct drbd_conf *mdev, const char *reason, const int dgs, GFP_NOIO); if (!cn_reply) { - dev_err(DEV, "could not kmalloc buffer for drbd_bcast_ee, sector %llu, size %u\n", - (unsigned long long)e->i.sector, e->i.size); + dev_err(DEV, "could not kmalloc buffer for drbd_bcast_ee, " + "sector %llu, size %u\n", + (unsigned long long)peer_req->i.sector, + peer_req->i.size); return; } @@ -2483,15 +2485,15 @@ void drbd_bcast_ee(struct drbd_conf *mdev, const char *reason, const int dgs, tl = tl_add_str(tl, T_dump_ee_reason, reason); tl = tl_add_blob(tl, T_seen_digest, seen_hash, dgs); tl = tl_add_blob(tl, T_calc_digest, calc_hash, dgs); - tl = tl_add_int(tl, T_ee_sector, &e->i.sector); - tl = tl_add_int(tl, T_ee_block_id, &e->block_id); + tl = tl_add_int(tl, T_ee_sector, &peer_req->i.sector); + tl = tl_add_int(tl, T_ee_block_id, &peer_req->block_id); /* dump the first 32k */ - len = min_t(unsigned, e->i.size, 32 << 10); + len = min_t(unsigned, peer_req->i.size, 32 << 10); put_unaligned(T_ee_data, tl++); put_unaligned(len, tl++); - page = e->pages; + page = peer_req->pages; page_chain_for_each(page) { void *d = kmap_atomic(page, KM_USER0); unsigned l = min_t(unsigned, len, PAGE_SIZE); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6ba94febfab..3a9cd31e094 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -189,7 +189,7 @@ static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed) { - struct drbd_peer_request *e; + struct drbd_peer_request *peer_req; struct list_head *le, *tle; /* The EEs are always appended to the end of the list. Since @@ -198,8 +198,8 @@ static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed stop to examine the list... */ list_for_each_safe(le, tle, &mdev->net_ee) { - e = list_entry(le, struct drbd_peer_request, w.list); - if (drbd_ee_has_active_page(e)) + peer_req = list_entry(le, struct drbd_peer_request, w.list); + if (drbd_ee_has_active_page(peer_req)) break; list_move(le, to_be_freed); } @@ -208,14 +208,14 @@ static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev) { LIST_HEAD(reclaimed); - struct drbd_peer_request *e, *t; + struct drbd_peer_request *peer_req, *t; spin_lock_irq(&mdev->tconn->req_lock); reclaim_net_ee(mdev, &reclaimed); spin_unlock_irq(&mdev->tconn->req_lock); - list_for_each_entry_safe(e, t, &reclaimed, w.list) - drbd_free_net_ee(mdev, e); + list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) + drbd_free_net_ee(mdev, peer_req); } /** @@ -313,15 +313,15 @@ struct drbd_peer_request * drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector, unsigned int data_size, gfp_t gfp_mask) __must_hold(local) { - struct drbd_peer_request *e; + struct drbd_peer_request *peer_req; struct page *page; unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE)) return NULL; - e = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM); - if (!e) { + peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM); + if (!peer_req) { if (!(gfp_mask & __GFP_NOWARN)) dev_err(DEV, "alloc_ee: Allocation of an EE failed\n"); return NULL; @@ -331,45 +331,45 @@ drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector, if (!page) goto fail; - drbd_clear_interval(&e->i); - e->i.size = data_size; - e->i.sector = sector; - e->i.local = false; - e->i.waiting = false; + drbd_clear_interval(&peer_req->i); + peer_req->i.size = data_size; + peer_req->i.sector = sector; + peer_req->i.local = false; + peer_req->i.waiting = false; - e->epoch = NULL; - e->mdev = mdev; - e->pages = page; - atomic_set(&e->pending_bios, 0); - e->flags = 0; + peer_req->epoch = NULL; + peer_req->mdev = mdev; + peer_req->pages = page; + atomic_set(&peer_req->pending_bios, 0); + peer_req->flags = 0; /* * The block_id is opaque to the receiver. It is not endianness * converted, and sent back to the sender unchanged. */ - e->block_id = id; + peer_req->block_id = id; - return e; + return peer_req; fail: - mempool_free(e, drbd_ee_mempool); + mempool_free(peer_req, drbd_ee_mempool); return NULL; } -void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *e, +void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req, int is_net) { - if (e->flags & EE_HAS_DIGEST) - kfree(e->digest); - drbd_pp_free(mdev, e->pages, is_net); - D_ASSERT(atomic_read(&e->pending_bios) == 0); - D_ASSERT(drbd_interval_empty(&e->i)); - mempool_free(e, drbd_ee_mempool); + if (peer_req->flags & EE_HAS_DIGEST) + kfree(peer_req->digest); + drbd_pp_free(mdev, peer_req->pages, is_net); + D_ASSERT(atomic_read(&peer_req->pending_bios) == 0); + D_ASSERT(drbd_interval_empty(&peer_req->i)); + mempool_free(peer_req, drbd_ee_mempool); } int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list) { LIST_HEAD(work_list); - struct drbd_peer_request *e, *t; + struct drbd_peer_request *peer_req, *t; int count = 0; int is_net = list == &mdev->net_ee; @@ -377,8 +377,8 @@ int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list) list_splice_init(list, &work_list); spin_unlock_irq(&mdev->tconn->req_lock); - list_for_each_entry_safe(e, t, &work_list, w.list) { - drbd_free_some_ee(mdev, e, is_net); + list_for_each_entry_safe(peer_req, t, &work_list, w.list) { + drbd_free_some_ee(mdev, peer_req, is_net); count++; } return count; @@ -398,7 +398,7 @@ static int drbd_process_done_ee(struct drbd_conf *mdev) { LIST_HEAD(work_list); LIST_HEAD(reclaimed); - struct drbd_peer_request *e, *t; + struct drbd_peer_request *peer_req, *t; int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS); spin_lock_irq(&mdev->tconn->req_lock); @@ -406,17 +406,17 @@ static int drbd_process_done_ee(struct drbd_conf *mdev) list_splice_init(&mdev->done_ee, &work_list); spin_unlock_irq(&mdev->tconn->req_lock); - list_for_each_entry_safe(e, t, &reclaimed, w.list) - drbd_free_net_ee(mdev, e); + list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) + drbd_free_net_ee(mdev, peer_req); /* possible callbacks here: * e_end_block, and e_end_resync_block, e_send_discard_ack. * all ignore the last argument. */ - list_for_each_entry_safe(e, t, &work_list, w.list) { + list_for_each_entry_safe(peer_req, t, &work_list, w.list) { /* list_del not necessary, next/prev members not touched */ - ok = e->w.cb(mdev, &e->w, !ok) && ok; - drbd_free_ee(mdev, e); + ok = peer_req->w.cb(mdev, &peer_req->w, !ok) && ok; + drbd_free_ee(mdev, peer_req); } wake_up(&mdev->ee_wait); @@ -1085,7 +1085,7 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) /** * drbd_submit_ee() * @mdev: DRBD device. - * @e: peer request + * @peer_req: peer request * @rw: flag field, see bio->bi_rw * * May spread the pages to multiple bios, @@ -1099,14 +1099,14 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) * on certain Xen deployments. */ /* TODO allocate from our own bio_set. */ -int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_peer_request *e, +int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req, const unsigned rw, const int fault_type) { struct bio *bios = NULL; struct bio *bio; - struct page *page = e->pages; - sector_t sector = e->i.sector; - unsigned ds = e->i.size; + struct page *page = peer_req->pages; + sector_t sector = peer_req->i.sector; + unsigned ds = peer_req->i.size; unsigned n_bios = 0; unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; int err = -ENOMEM; @@ -1121,11 +1121,11 @@ next_bio: dev_err(DEV, "submit_ee: Allocation of a bio failed\n"); goto fail; } - /* > e->i.sector, unless this is the first bio */ + /* > peer_req->i.sector, unless this is the first bio */ bio->bi_sector = sector; bio->bi_bdev = mdev->ldev->backing_bdev; bio->bi_rw = rw; - bio->bi_private = e; + bio->bi_private = peer_req; bio->bi_end_io = drbd_endio_sec; bio->bi_next = bios; @@ -1155,7 +1155,7 @@ next_bio: D_ASSERT(page == NULL); D_ASSERT(ds == 0); - atomic_set(&e->pending_bios, n_bios); + atomic_set(&peer_req->pending_bios, n_bios); do { bio = bios; bios = bios->bi_next; @@ -1175,9 +1175,9 @@ fail: } static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev, - struct drbd_peer_request *e) + struct drbd_peer_request *peer_req) { - struct drbd_interval *i = &e->i; + struct drbd_interval *i = &peer_req->i; drbd_remove_interval(&mdev->write_requests, i); drbd_clear_interval(i); @@ -1266,7 +1266,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __must_hold(local) { const sector_t capacity = drbd_get_capacity(mdev->this_bdev); - struct drbd_peer_request *e; + struct drbd_peer_request *peer_req; struct page *page; int dgs, ds, rr; void *dig_in = mdev->tconn->int_dig_in; @@ -1309,12 +1309,12 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD * "criss-cross" setup, that might cause write-out on some other DRBD, * which in turn might block on the other node at this very place. */ - e = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO); - if (!e) + peer_req = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO); + if (!peer_req) return NULL; ds = data_size; - page = e->pages; + page = peer_req->pages; page_chain_for_each(page) { unsigned len = min_t(int, ds, PAGE_SIZE); data = kmap(page); @@ -1325,7 +1325,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, } kunmap(page); if (rr != len) { - drbd_free_ee(mdev, e); + drbd_free_ee(mdev, peer_req); if (!signal_pending(current)) dev_warn(DEV, "short read receiving data: read %d expected %d\n", rr, len); @@ -1335,18 +1335,18 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, } if (dgs) { - drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, e, dig_vv); + drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, peer_req, dig_vv); if (memcmp(dig_in, dig_vv, dgs)) { dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n", (unsigned long long)sector, data_size); drbd_bcast_ee(mdev, "digest failed", - dgs, dig_in, dig_vv, e); - drbd_free_ee(mdev, e); + dgs, dig_in, dig_vv, peer_req); + drbd_free_ee(mdev, peer_req); return NULL; } } mdev->recv_cnt += data_size>>9; - return e; + return peer_req; } /* drbd_drain_block() just takes a data block @@ -1445,20 +1445,20 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, * drbd_process_done_ee() by asender only */ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused) { - struct drbd_peer_request *e = (struct drbd_peer_request *)w; - sector_t sector = e->i.sector; + struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w; + sector_t sector = peer_req->i.sector; int ok; - D_ASSERT(drbd_interval_empty(&e->i)); + D_ASSERT(drbd_interval_empty(&peer_req->i)); - if (likely((e->flags & EE_WAS_ERROR) == 0)) { - drbd_set_in_sync(mdev, sector, e->i.size); - ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, e); + if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { + drbd_set_in_sync(mdev, sector, peer_req->i.size); + ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req); } else { /* Record failure to sync */ - drbd_rs_failed_io(mdev, sector, e->i.size); + drbd_rs_failed_io(mdev, sector, peer_req->i.size); - ok = drbd_send_ack(mdev, P_NEG_ACK, e); + ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req); } dec_unacked(mdev); @@ -1467,10 +1467,10 @@ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local) { - struct drbd_peer_request *e; + struct drbd_peer_request *peer_req; - e = read_in_block(mdev, ID_SYNCER, sector, data_size); - if (!e) + peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size); + if (!peer_req) goto fail; dec_rs_pending(mdev); @@ -1479,23 +1479,23 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si /* corresponding dec_unacked() in e_end_resync_block() * respective _drbd_clear_done_ee */ - e->w.cb = e_end_resync_block; + peer_req->w.cb = e_end_resync_block; spin_lock_irq(&mdev->tconn->req_lock); - list_add(&e->w.list, &mdev->sync_ee); + list_add(&peer_req->w.list, &mdev->sync_ee); spin_unlock_irq(&mdev->tconn->req_lock); atomic_add(data_size >> 9, &mdev->rs_sect_ev); - if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0) + if (drbd_submit_ee(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0) return true; /* don't care for the reason here */ dev_err(DEV, "submit failed, triggering re-connect\n"); spin_lock_irq(&mdev->tconn->req_lock); - list_del(&e->w.list); + list_del(&peer_req->w.list); spin_unlock_irq(&mdev->tconn->req_lock); - drbd_free_ee(mdev, e); + drbd_free_ee(mdev, peer_req); fail: put_ldev(mdev); return false; @@ -1582,21 +1582,21 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd, */ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_peer_request *e = (struct drbd_peer_request *)w; - sector_t sector = e->i.sector; + struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w; + sector_t sector = peer_req->i.sector; int ok = 1, pcmd; if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) { - if (likely((e->flags & EE_WAS_ERROR) == 0)) { + if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { pcmd = (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn <= C_PAUSED_SYNC_T && - e->flags & EE_MAY_SET_IN_SYNC) ? + peer_req->flags & EE_MAY_SET_IN_SYNC) ? P_RS_WRITE_ACK : P_WRITE_ACK; - ok &= drbd_send_ack(mdev, pcmd, e); + ok &= drbd_send_ack(mdev, pcmd, peer_req); if (pcmd == P_RS_WRITE_ACK) - drbd_set_in_sync(mdev, sector, e->i.size); + drbd_set_in_sync(mdev, sector, peer_req->i.size); } else { - ok = drbd_send_ack(mdev, P_NEG_ACK, e); + ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req); /* we expect it to be marked out of sync anyways... * maybe assert this? */ } @@ -1606,28 +1606,28 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ if (mdev->tconn->net_conf->two_primaries) { spin_lock_irq(&mdev->tconn->req_lock); - D_ASSERT(!drbd_interval_empty(&e->i)); - drbd_remove_epoch_entry_interval(mdev, e); + D_ASSERT(!drbd_interval_empty(&peer_req->i)); + drbd_remove_epoch_entry_interval(mdev, peer_req); spin_unlock_irq(&mdev->tconn->req_lock); } else - D_ASSERT(drbd_interval_empty(&e->i)); + D_ASSERT(drbd_interval_empty(&peer_req->i)); - drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); + drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); return ok; } static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int unused) { - struct drbd_peer_request *e = (struct drbd_peer_request *)w; + struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w; int ok = 1; D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); - ok = drbd_send_ack(mdev, P_DISCARD_ACK, e); + ok = drbd_send_ack(mdev, P_DISCARD_ACK, peer_req); spin_lock_irq(&mdev->tconn->req_lock); - D_ASSERT(!drbd_interval_empty(&e->i)); - drbd_remove_epoch_entry_interval(mdev, e); + D_ASSERT(!drbd_interval_empty(&peer_req->i)); + drbd_remove_epoch_entry_interval(mdev, peer_req); spin_unlock_irq(&mdev->tconn->req_lock); dec_unacked(mdev); @@ -1731,7 +1731,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, unsigned int data_size) { sector_t sector; - struct drbd_peer_request *e; + struct drbd_peer_request *peer_req; struct p_data *p = &mdev->tconn->data.rbuf.data; int rw = WRITE; u32 dp_flags; @@ -1753,24 +1753,24 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, * the end of this function. */ sector = be64_to_cpu(p->sector); - e = read_in_block(mdev, p->block_id, sector, data_size); - if (!e) { + peer_req = read_in_block(mdev, p->block_id, sector, data_size); + if (!peer_req) { put_ldev(mdev); return false; } - e->w.cb = e_end_block; + peer_req->w.cb = e_end_block; dp_flags = be32_to_cpu(p->dp_flags); rw |= wire_flags_to_bio(mdev, dp_flags); if (dp_flags & DP_MAY_SET_IN_SYNC) - e->flags |= EE_MAY_SET_IN_SYNC; + peer_req->flags |= EE_MAY_SET_IN_SYNC; spin_lock(&mdev->epoch_lock); - e->epoch = mdev->current_epoch; - atomic_inc(&e->epoch->epoch_size); - atomic_inc(&e->epoch->active); + peer_req->epoch = mdev->current_epoch; + atomic_inc(&peer_req->epoch->epoch_size); + atomic_inc(&peer_req->epoch->active); spin_unlock(&mdev->epoch_lock); /* I'm the receiver, I do hold a net_cnt reference. */ @@ -1779,7 +1779,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, } else { /* don't get the req_lock yet, * we may sleep in drbd_wait_peer_seq */ - const int size = e->i.size; + const int size = peer_req->i.size; const int discard = test_bit(DISCARD_CONCURRENT, &mdev->flags); DEFINE_WAIT(wait); int first; @@ -1856,8 +1856,8 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, dev_alert(DEV, "Concurrent write! [DISCARD BY FLAG] sec=%llus\n", (unsigned long long)sector); inc_unacked(mdev); - e->w.cb = e_send_discard_ack; - list_add_tail(&e->w.list, &mdev->done_ee); + peer_req->w.cb = e_send_discard_ack; + list_add_tail(&peer_req->w.list, &mdev->done_ee); spin_unlock_irq(&mdev->tconn->req_lock); @@ -1894,10 +1894,10 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, } finish_wait(&mdev->misc_wait, &wait); - drbd_insert_interval(&mdev->write_requests, &e->i); + drbd_insert_interval(&mdev->write_requests, &peer_req->i); } - list_add(&e->w.list, &mdev->active_ee); + list_add(&peer_req->w.list, &mdev->active_ee); spin_unlock_irq(&mdev->tconn->req_lock); switch (mdev->tconn->net_conf->wire_protocol) { @@ -1909,7 +1909,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, case DRBD_PROT_B: /* I really don't like it that the receiver thread * sends on the msock, but anyways */ - drbd_send_ack(mdev, P_RECV_ACK, e); + drbd_send_ack(mdev, P_RECV_ACK, peer_req); break; case DRBD_PROT_A: /* nothing to do */ @@ -1918,28 +1918,28 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, if (mdev->state.pdsk < D_INCONSISTENT) { /* In case we have the only disk of the cluster, */ - drbd_set_out_of_sync(mdev, e->i.sector, e->i.size); - e->flags |= EE_CALL_AL_COMPLETE_IO; - e->flags &= ~EE_MAY_SET_IN_SYNC; - drbd_al_begin_io(mdev, e->i.sector); + drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size); + peer_req->flags |= EE_CALL_AL_COMPLETE_IO; + peer_req->flags &= ~EE_MAY_SET_IN_SYNC; + drbd_al_begin_io(mdev, peer_req->i.sector); } - if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0) + if (drbd_submit_ee(mdev, peer_req, rw, DRBD_FAULT_DT_WR) == 0) return true; /* don't care for the reason here */ dev_err(DEV, "submit failed, triggering re-connect\n"); spin_lock_irq(&mdev->tconn->req_lock); - list_del(&e->w.list); - drbd_remove_epoch_entry_interval(mdev, e); + list_del(&peer_req->w.list); + drbd_remove_epoch_entry_interval(mdev, peer_req); spin_unlock_irq(&mdev->tconn->req_lock); - if (e->flags & EE_CALL_AL_COMPLETE_IO) - drbd_al_complete_io(mdev, e->i.sector); + if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) + drbd_al_complete_io(mdev, peer_req->i.sector); out_interrupted: - drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + EV_CLEANUP); + drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP); put_ldev(mdev); - drbd_free_ee(mdev, e); + drbd_free_ee(mdev, peer_req); return false; } @@ -2015,7 +2015,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, { sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); - struct drbd_peer_request *e; + struct drbd_peer_request *peer_req; struct digest_info *di = NULL; int size, verb; unsigned int fault_type; @@ -2066,21 +2066,21 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD * "criss-cross" setup, that might cause write-out on some other DRBD, * which in turn might block on the other node at this very place. */ - e = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO); - if (!e) { + peer_req = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO); + if (!peer_req) { put_ldev(mdev); return false; } switch (cmd) { case P_DATA_REQUEST: - e->w.cb = w_e_end_data_req; + peer_req->w.cb = w_e_end_data_req; fault_type = DRBD_FAULT_DT_RD; /* application IO, don't drbd_rs_begin_io */ goto submit; case P_RS_DATA_REQUEST: - e->w.cb = w_e_end_rsdata_req; + peer_req->w.cb = w_e_end_rsdata_req; fault_type = DRBD_FAULT_RS_RD; /* used in the sector offset progress display */ mdev->bm_resync_fo = BM_SECT_TO_BIT(sector); @@ -2096,21 +2096,21 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, di->digest_size = digest_size; di->digest = (((char *)di)+sizeof(struct digest_info)); - e->digest = di; - e->flags |= EE_HAS_DIGEST; + peer_req->digest = di; + peer_req->flags |= EE_HAS_DIGEST; if (drbd_recv(mdev, di->digest, digest_size) != digest_size) goto out_free_e; if (cmd == P_CSUM_RS_REQUEST) { D_ASSERT(mdev->tconn->agreed_pro_version >= 89); - e->w.cb = w_e_end_csum_rs_req; + peer_req->w.cb = w_e_end_csum_rs_req; /* used in the sector offset progress display */ mdev->bm_resync_fo = BM_SECT_TO_BIT(sector); } else if (cmd == P_OV_REPLY) { /* track progress, we may need to throttle */ atomic_add(size >> 9, &mdev->rs_sect_in); - e->w.cb = w_e_end_ov_reply; + peer_req->w.cb = w_e_end_ov_reply; dec_rs_pending(mdev); /* drbd_rs_begin_io done when we sent this request, * but accounting still needs to be done. */ @@ -2134,7 +2134,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, dev_info(DEV, "Online Verify start sector: %llu\n", (unsigned long long)sector); } - e->w.cb = w_e_end_ov_req; + peer_req->w.cb = w_e_end_ov_req; fault_type = DRBD_FAULT_RS_RD; break; @@ -2178,22 +2178,22 @@ submit_for_resync: submit: inc_unacked(mdev); spin_lock_irq(&mdev->tconn->req_lock); - list_add_tail(&e->w.list, &mdev->read_ee); + list_add_tail(&peer_req->w.list, &mdev->read_ee); spin_unlock_irq(&mdev->tconn->req_lock); - if (drbd_submit_ee(mdev, e, READ, fault_type) == 0) + if (drbd_submit_ee(mdev, peer_req, READ, fault_type) == 0) return true; /* don't care for the reason here */ dev_err(DEV, "submit failed, triggering re-connect\n"); spin_lock_irq(&mdev->tconn->req_lock); - list_del(&e->w.list); + list_del(&peer_req->w.list); spin_unlock_irq(&mdev->tconn->req_lock); /* no drbd_rs_complete_io(), we are dropping the connection anyways */ out_free_e: put_ldev(mdev); - drbd_free_ee(mdev, e); + drbd_free_ee(mdev, peer_req); return false; } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 06628d1504b..f13d56c2bf0 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -80,47 +80,47 @@ void drbd_md_io_complete(struct bio *bio, int error) /* reads on behalf of the partner, * "submitted" by the receiver */ -void drbd_endio_read_sec_final(struct drbd_peer_request *e) __releases(local) +void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local) { unsigned long flags = 0; - struct drbd_conf *mdev = e->mdev; + struct drbd_conf *mdev = peer_req->mdev; spin_lock_irqsave(&mdev->tconn->req_lock, flags); - mdev->read_cnt += e->i.size >> 9; - list_del(&e->w.list); + mdev->read_cnt += peer_req->i.size >> 9; + list_del(&peer_req->w.list); if (list_empty(&mdev->read_ee)) wake_up(&mdev->ee_wait); - if (test_bit(__EE_WAS_ERROR, &e->flags)) + if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) __drbd_chk_io_error(mdev, false); spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); - drbd_queue_work(&mdev->tconn->data.work, &e->w); + drbd_queue_work(&mdev->tconn->data.work, &peer_req->w); put_ldev(mdev); } /* writes on behalf of the partner, or resync writes, * "submitted" by the receiver, final stage. */ -static void drbd_endio_write_sec_final(struct drbd_peer_request *e) __releases(local) +static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local) { unsigned long flags = 0; - struct drbd_conf *mdev = e->mdev; + struct drbd_conf *mdev = peer_req->mdev; sector_t e_sector; int do_wake; u64 block_id; int do_al_complete_io; - /* after we moved e to done_ee, + /* after we moved peer_req to done_ee, * we may no longer access it, * it may be freed/reused already! * (as soon as we release the req_lock) */ - e_sector = e->i.sector; - do_al_complete_io = e->flags & EE_CALL_AL_COMPLETE_IO; - block_id = e->block_id; + e_sector = peer_req->i.sector; + do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO; + block_id = peer_req->block_id; spin_lock_irqsave(&mdev->tconn->req_lock, flags); - mdev->writ_cnt += e->i.size >> 9; - list_del(&e->w.list); /* has been on active_ee or sync_ee */ - list_add_tail(&e->w.list, &mdev->done_ee); + mdev->writ_cnt += peer_req->i.size >> 9; + list_del(&peer_req->w.list); /* has been on active_ee or sync_ee */ + list_add_tail(&peer_req->w.list, &mdev->done_ee); /* * Do not remove from the write_requests tree here: we did not send the @@ -132,7 +132,7 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *e) __releases(l do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee); - if (test_bit(__EE_WAS_ERROR, &e->flags)) + if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) __drbd_chk_io_error(mdev, false); spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); @@ -154,20 +154,20 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *e) __releases(l */ void drbd_endio_sec(struct bio *bio, int error) { - struct drbd_peer_request *e = bio->bi_private; - struct drbd_conf *mdev = e->mdev; + struct drbd_peer_request *peer_req = bio->bi_private; + struct drbd_conf *mdev = peer_req->mdev; int uptodate = bio_flagged(bio, BIO_UPTODATE); int is_write = bio_data_dir(bio) == WRITE; if (error && __ratelimit(&drbd_ratelimit_state)) dev_warn(DEV, "%s: error=%d s=%llus\n", is_write ? "write" : "read", error, - (unsigned long long)e->i.sector); + (unsigned long long)peer_req->i.sector); if (!error && !uptodate) { if (__ratelimit(&drbd_ratelimit_state)) dev_warn(DEV, "%s: setting error to -EIO s=%llus\n", is_write ? "write" : "read", - (unsigned long long)e->i.sector); + (unsigned long long)peer_req->i.sector); /* strange behavior of some lower level drivers... * fail the request by clearing the uptodate flag, * but do not return any error?! */ @@ -175,14 +175,14 @@ void drbd_endio_sec(struct bio *bio, int error) } if (error) - set_bit(__EE_WAS_ERROR, &e->flags); + set_bit(__EE_WAS_ERROR, &peer_req->flags); bio_put(bio); /* no need for the bio anymore */ - if (atomic_dec_and_test(&e->pending_bios)) { + if (atomic_dec_and_test(&peer_req->pending_bios)) { if (is_write) - drbd_endio_write_sec_final(e); + drbd_endio_write_sec_final(peer_req); else - drbd_endio_read_sec_final(e); + drbd_endio_read_sec_final(peer_req); } } @@ -248,11 +248,11 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, - struct drbd_peer_request *e, void *digest) + struct drbd_peer_request *peer_req, void *digest) { struct hash_desc desc; struct scatterlist sg; - struct page *page = e->pages; + struct page *page = peer_req->pages; struct page *tmp; unsigned len; @@ -269,7 +269,7 @@ void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, page = tmp; } /* and now the last, possibly only partially used page */ - len = e->i.size & (PAGE_SIZE - 1); + len = peer_req->i.size & (PAGE_SIZE - 1); sg_set_page(&sg, page, len ?: PAGE_SIZE, 0); crypto_hash_update(&desc, &sg, sg.length); crypto_hash_final(&desc, digest); @@ -298,7 +298,8 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio * /* TODO merge common code with w_e_end_ov_req */ int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_peer_request *e = container_of(w, struct drbd_peer_request, w); + struct drbd_peer_request *peer_req = + container_of(w, struct drbd_peer_request, w); int digest_size; void *digest; int ok = 1; @@ -306,22 +307,22 @@ int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) if (unlikely(cancel)) goto out; - if (likely((e->flags & EE_WAS_ERROR) != 0)) + if (likely((peer_req->flags & EE_WAS_ERROR) != 0)) goto out; digest_size = crypto_hash_digestsize(mdev->csums_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (digest) { - sector_t sector = e->i.sector; - unsigned int size = e->i.size; - drbd_csum_ee(mdev, mdev->csums_tfm, e, digest); + sector_t sector = peer_req->i.sector; + unsigned int size = peer_req->i.size; + drbd_csum_ee(mdev, mdev->csums_tfm, peer_req, digest); /* Free e and pages before send. * In case we block on congestion, we could otherwise run into * some distributed deadlock, if the other side blocks on * congestion as well, because our receiver blocks in * drbd_pp_alloc due to pp_in_use > max_buffers. */ - drbd_free_ee(mdev, e); - e = NULL; + drbd_free_ee(mdev, peer_req); + peer_req = NULL; inc_rs_pending(mdev); ok = drbd_send_drequest_csum(mdev, sector, size, digest, digest_size, @@ -333,8 +334,8 @@ int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } out: - if (e) - drbd_free_ee(mdev, e); + if (peer_req) + drbd_free_ee(mdev, peer_req); if (unlikely(!ok)) dev_err(DEV, "drbd_send_drequest(..., csum) failed\n"); @@ -345,7 +346,7 @@ out: static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) { - struct drbd_peer_request *e; + struct drbd_peer_request *peer_req; if (!get_ldev(mdev)) return -EIO; @@ -355,17 +356,17 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) /* GFP_TRY, because if there is no memory available right now, this may * be rescheduled for later. It is "only" background resync, after all. */ - e = drbd_alloc_ee(mdev, ID_SYNCER /* unused */, sector, size, GFP_TRY); - if (!e) + peer_req = drbd_alloc_ee(mdev, ID_SYNCER /* unused */, sector, size, GFP_TRY); + if (!peer_req) goto defer; - e->w.cb = w_e_send_csum; + peer_req->w.cb = w_e_send_csum; spin_lock_irq(&mdev->tconn->req_lock); - list_add(&e->w.list, &mdev->read_ee); + list_add(&peer_req->w.list, &mdev->read_ee); spin_unlock_irq(&mdev->tconn->req_lock); atomic_add(size >> 9, &mdev->rs_sect_ev); - if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0) + if (drbd_submit_ee(mdev, peer_req, READ, DRBD_FAULT_RS_RD) == 0) return 0; /* If it failed because of ENOMEM, retry should help. If it failed @@ -373,10 +374,10 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) * retry may or may not help. * If it does not, you may need to force disconnect. */ spin_lock_irq(&mdev->tconn->req_lock); - list_del(&e->w.list); + list_del(&peer_req->w.list); spin_unlock_irq(&mdev->tconn->req_lock); - drbd_free_ee(mdev, e); + drbd_free_ee(mdev, peer_req); defer: put_ldev(mdev); return -EAGAIN; @@ -901,19 +902,19 @@ out: } /* helper */ -static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_request *e) +static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_request *peer_req) { - if (drbd_ee_has_active_page(e)) { + if (drbd_ee_has_active_page(peer_req)) { /* This might happen if sendpage() has not finished */ - int i = (e->i.size + PAGE_SIZE -1) >> PAGE_SHIFT; + int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT; atomic_add(i, &mdev->pp_in_use_by_net); atomic_sub(i, &mdev->pp_in_use); spin_lock_irq(&mdev->tconn->req_lock); - list_add_tail(&e->w.list, &mdev->net_ee); + list_add_tail(&peer_req->w.list, &mdev->net_ee); spin_unlock_irq(&mdev->tconn->req_lock); wake_up(&drbd_pp_wait); } else - drbd_free_ee(mdev, e); + drbd_free_ee(mdev, peer_req); } /** @@ -924,28 +925,28 @@ static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_requ */ int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_peer_request *e = container_of(w, struct drbd_peer_request, w); + struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); int ok; if (unlikely(cancel)) { - drbd_free_ee(mdev, e); + drbd_free_ee(mdev, peer_req); dec_unacked(mdev); return 1; } - if (likely((e->flags & EE_WAS_ERROR) == 0)) { - ok = drbd_send_block(mdev, P_DATA_REPLY, e); + if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { + ok = drbd_send_block(mdev, P_DATA_REPLY, peer_req); } else { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Sending NegDReply. sector=%llus.\n", - (unsigned long long)e->i.sector); + (unsigned long long)peer_req->i.sector); - ok = drbd_send_ack(mdev, P_NEG_DREPLY, e); + ok = drbd_send_ack(mdev, P_NEG_DREPLY, peer_req); } dec_unacked(mdev); - move_to_net_ee_or_free(mdev, e); + move_to_net_ee_or_free(mdev, peer_req); if (unlikely(!ok)) dev_err(DEV, "drbd_send_block() failed\n"); @@ -960,26 +961,26 @@ int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) */ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_peer_request *e = container_of(w, struct drbd_peer_request, w); + struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); int ok; if (unlikely(cancel)) { - drbd_free_ee(mdev, e); + drbd_free_ee(mdev, peer_req); dec_unacked(mdev); return 1; } if (get_ldev_if_state(mdev, D_FAILED)) { - drbd_rs_complete_io(mdev, e->i.sector); + drbd_rs_complete_io(mdev, peer_req->i.sector); put_ldev(mdev); } if (mdev->state.conn == C_AHEAD) { - ok = drbd_send_ack(mdev, P_RS_CANCEL, e); - } else if (likely((e->flags & EE_WAS_ERROR) == 0)) { + ok = drbd_send_ack(mdev, P_RS_CANCEL, peer_req); + } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { if (likely(mdev->state.pdsk >= D_INCONSISTENT)) { inc_rs_pending(mdev); - ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e); + ok = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req); } else { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Not sending RSDataReply, " @@ -989,17 +990,17 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } else { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Sending NegRSDReply. sector %llus.\n", - (unsigned long long)e->i.sector); + (unsigned long long)peer_req->i.sector); - ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e); + ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req); /* update resync data with failure */ - drbd_rs_failed_io(mdev, e->i.sector, e->i.size); + drbd_rs_failed_io(mdev, peer_req->i.sector, peer_req->i.size); } dec_unacked(mdev); - move_to_net_ee_or_free(mdev, e); + move_to_net_ee_or_free(mdev, peer_req); if (unlikely(!ok)) dev_err(DEV, "drbd_send_block() failed\n"); @@ -1008,26 +1009,26 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_peer_request *e = container_of(w, struct drbd_peer_request, w); + struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); struct digest_info *di; int digest_size; void *digest = NULL; int ok, eq = 0; if (unlikely(cancel)) { - drbd_free_ee(mdev, e); + drbd_free_ee(mdev, peer_req); dec_unacked(mdev); return 1; } if (get_ldev(mdev)) { - drbd_rs_complete_io(mdev, e->i.sector); + drbd_rs_complete_io(mdev, peer_req->i.sector); put_ldev(mdev); } - di = e->digest; + di = peer_req->digest; - if (likely((e->flags & EE_WAS_ERROR) == 0)) { + if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { /* quick hack to try to avoid a race against reconfiguration. * a real fix would be much more involved, * introducing more locking mechanisms */ @@ -1037,31 +1038,31 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) digest = kmalloc(digest_size, GFP_NOIO); } if (digest) { - drbd_csum_ee(mdev, mdev->csums_tfm, e, digest); + drbd_csum_ee(mdev, mdev->csums_tfm, peer_req, digest); eq = !memcmp(digest, di->digest, digest_size); kfree(digest); } if (eq) { - drbd_set_in_sync(mdev, e->i.sector, e->i.size); + drbd_set_in_sync(mdev, peer_req->i.sector, peer_req->i.size); /* rs_same_csums unit is BM_BLOCK_SIZE */ - mdev->rs_same_csum += e->i.size >> BM_BLOCK_SHIFT; - ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e); + mdev->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT; + ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, peer_req); } else { inc_rs_pending(mdev); - e->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */ - e->flags &= ~EE_HAS_DIGEST; /* This e no longer has a digest pointer */ + peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */ + peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */ kfree(di); - ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e); + ok = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req); } } else { - ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e); + ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req); if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n"); } dec_unacked(mdev); - move_to_net_ee_or_free(mdev, e); + move_to_net_ee_or_free(mdev, peer_req); if (unlikely(!ok)) dev_err(DEV, "drbd_send_block/ack() failed\n"); @@ -1071,9 +1072,9 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) /* TODO merge common code with w_e_send_csum */ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_peer_request *e = container_of(w, struct drbd_peer_request, w); - sector_t sector = e->i.sector; - unsigned int size = e->i.size; + struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); + sector_t sector = peer_req->i.sector; + unsigned int size = peer_req->i.size; int digest_size; void *digest; int ok = 1; @@ -1088,8 +1089,8 @@ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) goto out; } - if (likely(!(e->flags & EE_WAS_ERROR))) - drbd_csum_ee(mdev, mdev->verify_tfm, e, digest); + if (likely(!(peer_req->flags & EE_WAS_ERROR))) + drbd_csum_ee(mdev, mdev->verify_tfm, peer_req, digest); else memset(digest, 0, digest_size); @@ -1098,8 +1099,8 @@ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * some distributed deadlock, if the other side blocks on * congestion as well, because our receiver blocks in * drbd_pp_alloc due to pp_in_use > max_buffers. */ - drbd_free_ee(mdev, e); - e = NULL; + drbd_free_ee(mdev, peer_req); + peer_req = NULL; inc_rs_pending(mdev); ok = drbd_send_drequest_csum(mdev, sector, size, digest, digest_size, @@ -1109,8 +1110,8 @@ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) kfree(digest); out: - if (e) - drbd_free_ee(mdev, e); + if (peer_req) + drbd_free_ee(mdev, peer_req); dec_unacked(mdev); return ok; } @@ -1128,16 +1129,16 @@ void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size) int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_peer_request *e = container_of(w, struct drbd_peer_request, w); + struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); struct digest_info *di; void *digest; - sector_t sector = e->i.sector; - unsigned int size = e->i.size; + sector_t sector = peer_req->i.sector; + unsigned int size = peer_req->i.size; int digest_size; int ok, eq = 0; if (unlikely(cancel)) { - drbd_free_ee(mdev, e); + drbd_free_ee(mdev, peer_req); dec_unacked(mdev); return 1; } @@ -1145,17 +1146,17 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all * the resync lru has been cleaned up already */ if (get_ldev(mdev)) { - drbd_rs_complete_io(mdev, e->i.sector); + drbd_rs_complete_io(mdev, peer_req->i.sector); put_ldev(mdev); } - di = e->digest; + di = peer_req->digest; - if (likely((e->flags & EE_WAS_ERROR) == 0)) { + if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { digest_size = crypto_hash_digestsize(mdev->verify_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (digest) { - drbd_csum_ee(mdev, mdev->verify_tfm, e, digest); + drbd_csum_ee(mdev, mdev->verify_tfm, peer_req, digest); D_ASSERT(digest_size == di->digest_size); eq = !memcmp(digest, di->digest, digest_size); @@ -1168,7 +1169,7 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * some distributed deadlock, if the other side blocks on * congestion as well, because our receiver blocks in * drbd_pp_alloc due to pp_in_use > max_buffers. */ - drbd_free_ee(mdev, e); + drbd_free_ee(mdev, peer_req); if (!eq) drbd_ov_oos_found(mdev, sector, size); else From b8907339534b8d17f6aad9e9cc98d490aa0c6137 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 27 Jan 2011 14:07:51 +0100 Subject: [PATCH 061/609] drbd: Moved the state functions into its own source file Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/Makefile | 2 +- drivers/block/drbd/drbd_int.h | 46 +- drivers/block/drbd/drbd_main.c | 1179 +----------------------------- drivers/block/drbd/drbd_state.c | 1217 +++++++++++++++++++++++++++++++ drivers/block/drbd/drbd_state.h | 101 +++ 5 files changed, 1326 insertions(+), 1219 deletions(-) create mode 100644 drivers/block/drbd/drbd_state.c create mode 100644 drivers/block/drbd/drbd_state.h diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile index cacbb04f285..06fb4453734 100644 --- a/drivers/block/drbd/Makefile +++ b/drivers/block/drbd/Makefile @@ -1,6 +1,6 @@ drbd-y := drbd_bitmap.o drbd_proc.o drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o drbd-y += drbd_main.o drbd_strings.o drbd_nl.o -drbd-y += drbd_interval.o +drbd-y += drbd_interval.o drbd_state.o obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 302ccc6d943..98addab2c92 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -43,6 +43,8 @@ #include #include #include +#include +#include "drbd_state.h" #ifdef __CHECKER__ # define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr"))) @@ -1120,35 +1122,12 @@ static inline void drbd_put_data_sock(struct drbd_conf *mdev) /* drbd_main.c */ -enum chg_state_flags { - CS_HARD = 1, - CS_VERBOSE = 2, - CS_WAIT_COMPLETE = 4, - CS_SERIALIZE = 8, - CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE, -}; - enum dds_flags { DDSF_FORCED = 1, DDSF_NO_RESYNC = 2, /* Do not run a resync for the new space */ }; extern void drbd_init_set_defaults(struct drbd_conf *mdev); -extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev, - enum chg_state_flags f, - union drbd_state mask, - union drbd_state val); -extern void drbd_force_state(struct drbd_conf *, union drbd_state, - union drbd_state); -extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *, - union drbd_state, - union drbd_state, - enum chg_state_flags); -extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state, - enum chg_state_flags, - struct completion *done); -extern void print_st_err(struct drbd_conf *, union drbd_state, - union drbd_state, int); extern int drbd_thread_start(struct drbd_thread *thi); extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait); #ifdef CONFIG_SMP @@ -1712,6 +1691,10 @@ static inline int drbd_ee_has_active_page(struct drbd_peer_request *peer_req) } + + + + static inline void drbd_state_lock(struct drbd_conf *mdev) { wait_event(mdev->misc_wait, @@ -1737,23 +1720,6 @@ _drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, return rv; } -/** - * drbd_request_state() - Reqest a state change - * @mdev: DRBD device. - * @mask: mask of state bits to change. - * @val: value of new state bits. - * - * This is the most graceful way of requesting a state change. It is verbose - * quite verbose in case the state change is not possible, and all those - * state changes are globally serialized. - */ -static inline int drbd_request_state(struct drbd_conf *mdev, - union drbd_state mask, - union drbd_state val) -{ - return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED); -} - #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where) { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 7728d161340..4b39b3d0dd5 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -56,14 +56,6 @@ #include "drbd_vli.h" -struct after_state_chg_work { - struct drbd_work w; - union drbd_state os; - union drbd_state ns; - enum chg_state_flags flags; - struct completion *done; -}; - static DEFINE_MUTEX(drbd_main_mutex); int drbdd_init(struct drbd_thread *); int drbd_worker(struct drbd_thread *); @@ -72,9 +64,6 @@ int drbd_asender(struct drbd_thread *); int drbd_init(void); static int drbd_open(struct block_device *bdev, fmode_t mode); static int drbd_release(struct gendisk *gd, fmode_t mode); -static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused); -static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, - union drbd_state ns, enum chg_state_flags flags); static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused); static void md_sync_timer_fn(unsigned long data); static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); @@ -340,7 +329,7 @@ bail: * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO, * RESTART_FROZEN_DISK_IO. */ -static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) +void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) { struct drbd_tl_epoch *b, *tmp, **pn; struct list_head *le, *tle, carry_reads; @@ -450,1172 +439,6 @@ void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) spin_unlock_irq(&mdev->tconn->req_lock); } -/** - * cl_wide_st_chg() - true if the state change is a cluster wide one - * @mdev: DRBD device. - * @os: old (current) state. - * @ns: new (wanted) state. - */ -static int cl_wide_st_chg(struct drbd_conf *mdev, - union drbd_state os, union drbd_state ns) -{ - return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED && - ((os.role != R_PRIMARY && ns.role == R_PRIMARY) || - (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || - (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) || - (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))) || - (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) || - (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S); -} - -enum drbd_state_rv -drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, - union drbd_state mask, union drbd_state val) -{ - unsigned long flags; - union drbd_state os, ns; - enum drbd_state_rv rv; - - spin_lock_irqsave(&mdev->tconn->req_lock, flags); - os = mdev->state; - ns.i = (os.i & ~mask.i) | val.i; - rv = _drbd_set_state(mdev, ns, f, NULL); - ns = mdev->state; - spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); - - return rv; -} - -/** - * drbd_force_state() - Impose a change which happens outside our control on our state - * @mdev: DRBD device. - * @mask: mask of state bits to change. - * @val: value of new state bits. - */ -void drbd_force_state(struct drbd_conf *mdev, - union drbd_state mask, union drbd_state val) -{ - drbd_change_state(mdev, CS_HARD, mask, val); -} - -static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); -static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *, - union drbd_state, - union drbd_state); -static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, - union drbd_state ns, const char **warn_sync_abort); -int drbd_send_state_req(struct drbd_conf *, - union drbd_state, union drbd_state); - -static enum drbd_state_rv -_req_st_cond(struct drbd_conf *mdev, union drbd_state mask, - union drbd_state val) -{ - union drbd_state os, ns; - unsigned long flags; - enum drbd_state_rv rv; - - if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags)) - return SS_CW_SUCCESS; - - if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags)) - return SS_CW_FAILED_BY_PEER; - - rv = 0; - spin_lock_irqsave(&mdev->tconn->req_lock, flags); - os = mdev->state; - ns.i = (os.i & ~mask.i) | val.i; - ns = sanitize_state(mdev, os, ns, NULL); - - if (!cl_wide_st_chg(mdev, os, ns)) - rv = SS_CW_NO_NEED; - if (!rv) { - rv = is_valid_state(mdev, ns); - if (rv == SS_SUCCESS) { - rv = is_valid_state_transition(mdev, ns, os); - if (rv == SS_SUCCESS) - rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ - } - } - spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); - - return rv; -} - -/** - * drbd_req_state() - Perform an eventually cluster wide state change - * @mdev: DRBD device. - * @mask: mask of state bits to change. - * @val: value of new state bits. - * @f: flags - * - * Should not be called directly, use drbd_request_state() or - * _drbd_request_state(). - */ -static enum drbd_state_rv -drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, - union drbd_state val, enum chg_state_flags f) -{ - struct completion done; - unsigned long flags; - union drbd_state os, ns; - enum drbd_state_rv rv; - - init_completion(&done); - - if (f & CS_SERIALIZE) - mutex_lock(&mdev->state_mutex); - - spin_lock_irqsave(&mdev->tconn->req_lock, flags); - os = mdev->state; - ns.i = (os.i & ~mask.i) | val.i; - ns = sanitize_state(mdev, os, ns, NULL); - - if (cl_wide_st_chg(mdev, os, ns)) { - rv = is_valid_state(mdev, ns); - if (rv == SS_SUCCESS) - rv = is_valid_state_transition(mdev, ns, os); - spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); - - if (rv < SS_SUCCESS) { - if (f & CS_VERBOSE) - print_st_err(mdev, os, ns, rv); - goto abort; - } - - drbd_state_lock(mdev); - if (!drbd_send_state_req(mdev, mask, val)) { - drbd_state_unlock(mdev); - rv = SS_CW_FAILED_BY_PEER; - if (f & CS_VERBOSE) - print_st_err(mdev, os, ns, rv); - goto abort; - } - - wait_event(mdev->state_wait, - (rv = _req_st_cond(mdev, mask, val))); - - if (rv < SS_SUCCESS) { - drbd_state_unlock(mdev); - if (f & CS_VERBOSE) - print_st_err(mdev, os, ns, rv); - goto abort; - } - spin_lock_irqsave(&mdev->tconn->req_lock, flags); - os = mdev->state; - ns.i = (os.i & ~mask.i) | val.i; - rv = _drbd_set_state(mdev, ns, f, &done); - drbd_state_unlock(mdev); - } else { - rv = _drbd_set_state(mdev, ns, f, &done); - } - - spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); - - if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) { - D_ASSERT(current != mdev->tconn->worker.task); - wait_for_completion(&done); - } - -abort: - if (f & CS_SERIALIZE) - mutex_unlock(&mdev->state_mutex); - - return rv; -} - -/** - * _drbd_request_state() - Request a state change (with flags) - * @mdev: DRBD device. - * @mask: mask of state bits to change. - * @val: value of new state bits. - * @f: flags - * - * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE - * flag, or when logging of failed state change requests is not desired. - */ -enum drbd_state_rv -_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask, - union drbd_state val, enum chg_state_flags f) -{ - enum drbd_state_rv rv; - - wait_event(mdev->state_wait, - (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE); - - return rv; -} - -static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns) -{ - dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n", - name, - drbd_conn_str(ns.conn), - drbd_role_str(ns.role), - drbd_role_str(ns.peer), - drbd_disk_str(ns.disk), - drbd_disk_str(ns.pdsk), - is_susp(ns) ? 's' : 'r', - ns.aftr_isp ? 'a' : '-', - ns.peer_isp ? 'p' : '-', - ns.user_isp ? 'u' : '-' - ); -} - -void print_st_err(struct drbd_conf *mdev, union drbd_state os, - union drbd_state ns, enum drbd_state_rv err) -{ - if (err == SS_IN_TRANSIENT_STATE) - return; - dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err)); - print_st(mdev, " state", os); - print_st(mdev, "wanted", ns); -} - - -/** - * is_valid_state() - Returns an SS_ error code if ns is not valid - * @mdev: DRBD device. - * @ns: State to consider. - */ -static enum drbd_state_rv -is_valid_state(struct drbd_conf *mdev, union drbd_state ns) -{ - /* See drbd_state_sw_errors in drbd_strings.c */ - - enum drbd_fencing_p fp; - enum drbd_state_rv rv = SS_SUCCESS; - - fp = FP_DONT_CARE; - if (get_ldev(mdev)) { - fp = mdev->ldev->dc.fencing; - put_ldev(mdev); - } - - if (get_net_conf(mdev->tconn)) { - if (!mdev->tconn->net_conf->two_primaries && - ns.role == R_PRIMARY && ns.peer == R_PRIMARY) - rv = SS_TWO_PRIMARIES; - put_net_conf(mdev->tconn); - } - - if (rv <= 0) - /* already found a reason to abort */; - else if (ns.role == R_SECONDARY && mdev->open_cnt) - rv = SS_DEVICE_IN_USE; - - else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE) - rv = SS_NO_UP_TO_DATE_DISK; - - else if (fp >= FP_RESOURCE && - ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN) - rv = SS_PRIMARY_NOP; - - else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT) - rv = SS_NO_UP_TO_DATE_DISK; - - else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT) - rv = SS_NO_LOCAL_DISK; - - else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT) - rv = SS_NO_REMOTE_DISK; - - else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) - rv = SS_NO_UP_TO_DATE_DISK; - - else if ((ns.conn == C_CONNECTED || - ns.conn == C_WF_BITMAP_S || - ns.conn == C_SYNC_SOURCE || - ns.conn == C_PAUSED_SYNC_S) && - ns.disk == D_OUTDATED) - rv = SS_CONNECTED_OUTDATES; - - else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && - (mdev->sync_conf.verify_alg[0] == 0)) - rv = SS_NO_VERIFY_ALG; - - else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && - mdev->tconn->agreed_pro_version < 88) - rv = SS_NOT_SUPPORTED; - - else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN) - rv = SS_CONNECTED_OUTDATES; - - return rv; -} - -/** - * is_valid_state_transition() - Returns an SS_ error code if the state transition is not possible - * @mdev: DRBD device. - * @ns: new state. - * @os: old state. - */ -static enum drbd_state_rv -is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns, - union drbd_state os) -{ - enum drbd_state_rv rv = SS_SUCCESS; - - if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) && - os.conn > C_CONNECTED) - rv = SS_RESYNC_RUNNING; - - if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE) - rv = SS_ALREADY_STANDALONE; - - if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS) - rv = SS_IS_DISKLESS; - - if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED) - rv = SS_NO_NET_CONFIG; - - if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING) - rv = SS_LOWER_THAN_OUTDATED; - - if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED) - rv = SS_IN_TRANSIENT_STATE; - - if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS) - rv = SS_IN_TRANSIENT_STATE; - - if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED) - rv = SS_NEED_CONNECTION; - - if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && - ns.conn != os.conn && os.conn > C_CONNECTED) - rv = SS_RESYNC_RUNNING; - - if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) && - os.conn < C_CONNECTED) - rv = SS_NEED_CONNECTION; - - if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE) - && os.conn < C_WF_REPORT_PARAMS) - rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */ - - return rv; -} - -/** - * sanitize_state() - Resolves implicitly necessary additional changes to a state transition - * @mdev: DRBD device. - * @os: old state. - * @ns: new state. - * @warn_sync_abort: - * - * When we loose connection, we have to set the state of the peers disk (pdsk) - * to D_UNKNOWN. This rule and many more along those lines are in this function. - */ -static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, - union drbd_state ns, const char **warn_sync_abort) -{ - enum drbd_fencing_p fp; - enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max; - - fp = FP_DONT_CARE; - if (get_ldev(mdev)) { - fp = mdev->ldev->dc.fencing; - put_ldev(mdev); - } - - /* Disallow Network errors to configure a device's network part */ - if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) && - os.conn <= C_DISCONNECTING) - ns.conn = os.conn; - - /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow. - * If you try to go into some Sync* state, that shall fail (elsewhere). */ - if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN && - ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_TEAR_DOWN) - ns.conn = os.conn; - - /* we cannot fail (again) if we already detached */ - if (ns.disk == D_FAILED && os.disk == D_DISKLESS) - ns.disk = D_DISKLESS; - - /* if we are only D_ATTACHING yet, - * we can (and should) go directly to D_DISKLESS. */ - if (ns.disk == D_FAILED && os.disk == D_ATTACHING) - ns.disk = D_DISKLESS; - - /* After C_DISCONNECTING only C_STANDALONE may follow */ - if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE) - ns.conn = os.conn; - - if (ns.conn < C_CONNECTED) { - ns.peer_isp = 0; - ns.peer = R_UNKNOWN; - if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT) - ns.pdsk = D_UNKNOWN; - } - - /* Clear the aftr_isp when becoming unconfigured */ - if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY) - ns.aftr_isp = 0; - - /* Abort resync if a disk fails/detaches */ - if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED && - (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { - if (warn_sync_abort) - *warn_sync_abort = - os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ? - "Online-verify" : "Resync"; - ns.conn = C_CONNECTED; - } - - /* Connection breaks down before we finished "Negotiating" */ - if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING && - get_ldev_if_state(mdev, D_NEGOTIATING)) { - if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) { - ns.disk = mdev->new_state_tmp.disk; - ns.pdsk = mdev->new_state_tmp.pdsk; - } else { - dev_alert(DEV, "Connection lost while negotiating, no data!\n"); - ns.disk = D_DISKLESS; - ns.pdsk = D_UNKNOWN; - } - put_ldev(mdev); - } - - /* D_CONSISTENT and D_OUTDATED vanish when we get connected */ - if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) { - if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED) - ns.disk = D_UP_TO_DATE; - if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED) - ns.pdsk = D_UP_TO_DATE; - } - - /* Implications of the connection stat on the disk states */ - disk_min = D_DISKLESS; - disk_max = D_UP_TO_DATE; - pdsk_min = D_INCONSISTENT; - pdsk_max = D_UNKNOWN; - switch ((enum drbd_conns)ns.conn) { - case C_WF_BITMAP_T: - case C_PAUSED_SYNC_T: - case C_STARTING_SYNC_T: - case C_WF_SYNC_UUID: - case C_BEHIND: - disk_min = D_INCONSISTENT; - disk_max = D_OUTDATED; - pdsk_min = D_UP_TO_DATE; - pdsk_max = D_UP_TO_DATE; - break; - case C_VERIFY_S: - case C_VERIFY_T: - disk_min = D_UP_TO_DATE; - disk_max = D_UP_TO_DATE; - pdsk_min = D_UP_TO_DATE; - pdsk_max = D_UP_TO_DATE; - break; - case C_CONNECTED: - disk_min = D_DISKLESS; - disk_max = D_UP_TO_DATE; - pdsk_min = D_DISKLESS; - pdsk_max = D_UP_TO_DATE; - break; - case C_WF_BITMAP_S: - case C_PAUSED_SYNC_S: - case C_STARTING_SYNC_S: - case C_AHEAD: - disk_min = D_UP_TO_DATE; - disk_max = D_UP_TO_DATE; - pdsk_min = D_INCONSISTENT; - pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/ - break; - case C_SYNC_TARGET: - disk_min = D_INCONSISTENT; - disk_max = D_INCONSISTENT; - pdsk_min = D_UP_TO_DATE; - pdsk_max = D_UP_TO_DATE; - break; - case C_SYNC_SOURCE: - disk_min = D_UP_TO_DATE; - disk_max = D_UP_TO_DATE; - pdsk_min = D_INCONSISTENT; - pdsk_max = D_INCONSISTENT; - break; - case C_STANDALONE: - case C_DISCONNECTING: - case C_UNCONNECTED: - case C_TIMEOUT: - case C_BROKEN_PIPE: - case C_NETWORK_FAILURE: - case C_PROTOCOL_ERROR: - case C_TEAR_DOWN: - case C_WF_CONNECTION: - case C_WF_REPORT_PARAMS: - case C_MASK: - break; - } - if (ns.disk > disk_max) - ns.disk = disk_max; - - if (ns.disk < disk_min) { - dev_warn(DEV, "Implicitly set disk from %s to %s\n", - drbd_disk_str(ns.disk), drbd_disk_str(disk_min)); - ns.disk = disk_min; - } - if (ns.pdsk > pdsk_max) - ns.pdsk = pdsk_max; - - if (ns.pdsk < pdsk_min) { - dev_warn(DEV, "Implicitly set pdsk from %s to %s\n", - drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min)); - ns.pdsk = pdsk_min; - } - - if (fp == FP_STONITH && - (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) && - !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED)) - ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */ - - if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO && - (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) && - !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE)) - ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */ - - if (ns.aftr_isp || ns.peer_isp || ns.user_isp) { - if (ns.conn == C_SYNC_SOURCE) - ns.conn = C_PAUSED_SYNC_S; - if (ns.conn == C_SYNC_TARGET) - ns.conn = C_PAUSED_SYNC_T; - } else { - if (ns.conn == C_PAUSED_SYNC_S) - ns.conn = C_SYNC_SOURCE; - if (ns.conn == C_PAUSED_SYNC_T) - ns.conn = C_SYNC_TARGET; - } - - return ns; -} - -/* helper for __drbd_set_state */ -static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs) -{ - if (mdev->tconn->agreed_pro_version < 90) - mdev->ov_start_sector = 0; - mdev->rs_total = drbd_bm_bits(mdev); - mdev->ov_position = 0; - if (cs == C_VERIFY_T) { - /* starting online verify from an arbitrary position - * does not fit well into the existing protocol. - * on C_VERIFY_T, we initialize ov_left and friends - * implicitly in receive_DataRequest once the - * first P_OV_REQUEST is received */ - mdev->ov_start_sector = ~(sector_t)0; - } else { - unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector); - if (bit >= mdev->rs_total) { - mdev->ov_start_sector = - BM_BIT_TO_SECT(mdev->rs_total - 1); - mdev->rs_total = 1; - } else - mdev->rs_total -= bit; - mdev->ov_position = mdev->ov_start_sector; - } - mdev->ov_left = mdev->rs_total; -} - -static void drbd_resume_al(struct drbd_conf *mdev) -{ - if (test_and_clear_bit(AL_SUSPENDED, &mdev->flags)) - dev_info(DEV, "Resumed AL updates\n"); -} - -/** - * __drbd_set_state() - Set a new DRBD state - * @mdev: DRBD device. - * @ns: new state. - * @flags: Flags - * @done: Optional completion, that will get completed after the after_state_ch() finished - * - * Caller needs to hold req_lock, and global_state_lock. Do not call directly. - */ -enum drbd_state_rv -__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, - enum chg_state_flags flags, struct completion *done) -{ - union drbd_state os; - enum drbd_state_rv rv = SS_SUCCESS; - const char *warn_sync_abort = NULL; - struct after_state_chg_work *ascw; - - os = mdev->state; - - ns = sanitize_state(mdev, os, ns, &warn_sync_abort); - - if (ns.i == os.i) - return SS_NOTHING_TO_DO; - - if (!(flags & CS_HARD)) { - /* pre-state-change checks ; only look at ns */ - /* See drbd_state_sw_errors in drbd_strings.c */ - - rv = is_valid_state(mdev, ns); - if (rv < SS_SUCCESS) { - /* If the old state was illegal as well, then let - this happen...*/ - - if (is_valid_state(mdev, os) == rv) - rv = is_valid_state_transition(mdev, ns, os); - } else - rv = is_valid_state_transition(mdev, ns, os); - } - - if (rv < SS_SUCCESS) { - if (flags & CS_VERBOSE) - print_st_err(mdev, os, ns, rv); - return rv; - } - - if (warn_sync_abort) - dev_warn(DEV, "%s aborted.\n", warn_sync_abort); - - { - char *pbp, pb[300]; - pbp = pb; - *pbp = 0; - if (ns.role != os.role) - pbp += sprintf(pbp, "role( %s -> %s ) ", - drbd_role_str(os.role), - drbd_role_str(ns.role)); - if (ns.peer != os.peer) - pbp += sprintf(pbp, "peer( %s -> %s ) ", - drbd_role_str(os.peer), - drbd_role_str(ns.peer)); - if (ns.conn != os.conn) - pbp += sprintf(pbp, "conn( %s -> %s ) ", - drbd_conn_str(os.conn), - drbd_conn_str(ns.conn)); - if (ns.disk != os.disk) - pbp += sprintf(pbp, "disk( %s -> %s ) ", - drbd_disk_str(os.disk), - drbd_disk_str(ns.disk)); - if (ns.pdsk != os.pdsk) - pbp += sprintf(pbp, "pdsk( %s -> %s ) ", - drbd_disk_str(os.pdsk), - drbd_disk_str(ns.pdsk)); - if (is_susp(ns) != is_susp(os)) - pbp += sprintf(pbp, "susp( %d -> %d ) ", - is_susp(os), - is_susp(ns)); - if (ns.aftr_isp != os.aftr_isp) - pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ", - os.aftr_isp, - ns.aftr_isp); - if (ns.peer_isp != os.peer_isp) - pbp += sprintf(pbp, "peer_isp( %d -> %d ) ", - os.peer_isp, - ns.peer_isp); - if (ns.user_isp != os.user_isp) - pbp += sprintf(pbp, "user_isp( %d -> %d ) ", - os.user_isp, - ns.user_isp); - dev_info(DEV, "%s\n", pb); - } - - /* solve the race between becoming unconfigured, - * worker doing the cleanup, and - * admin reconfiguring us: - * on (re)configure, first set CONFIG_PENDING, - * then wait for a potentially exiting worker, - * start the worker, and schedule one no_op. - * then proceed with configuration. - */ - if (ns.disk == D_DISKLESS && - ns.conn == C_STANDALONE && - ns.role == R_SECONDARY && - !test_and_set_bit(CONFIG_PENDING, &mdev->flags)) - set_bit(DEVICE_DYING, &mdev->flags); - - /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference - * on the ldev here, to be sure the transition -> D_DISKLESS resp. - * drbd_ldev_destroy() won't happen before our corresponding - * after_state_ch works run, where we put_ldev again. */ - if ((os.disk != D_FAILED && ns.disk == D_FAILED) || - (os.disk != D_DISKLESS && ns.disk == D_DISKLESS)) - atomic_inc(&mdev->local_cnt); - - mdev->state = ns; - - if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) - drbd_print_uuids(mdev, "attached to UUIDs"); - - wake_up(&mdev->misc_wait); - wake_up(&mdev->state_wait); - - /* aborted verify run. log the last position */ - if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && - ns.conn < C_CONNECTED) { - mdev->ov_start_sector = - BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left); - dev_info(DEV, "Online Verify reached sector %llu\n", - (unsigned long long)mdev->ov_start_sector); - } - - if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) && - (ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)) { - dev_info(DEV, "Syncer continues.\n"); - mdev->rs_paused += (long)jiffies - -(long)mdev->rs_mark_time[mdev->rs_last_mark]; - if (ns.conn == C_SYNC_TARGET) - mod_timer(&mdev->resync_timer, jiffies); - } - - if ((os.conn == C_SYNC_TARGET || os.conn == C_SYNC_SOURCE) && - (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) { - dev_info(DEV, "Resync suspended\n"); - mdev->rs_mark_time[mdev->rs_last_mark] = jiffies; - } - - if (os.conn == C_CONNECTED && - (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) { - unsigned long now = jiffies; - int i; - - set_ov_position(mdev, ns.conn); - mdev->rs_start = now; - mdev->rs_last_events = 0; - mdev->rs_last_sect_ev = 0; - mdev->ov_last_oos_size = 0; - mdev->ov_last_oos_start = 0; - - for (i = 0; i < DRBD_SYNC_MARKS; i++) { - mdev->rs_mark_left[i] = mdev->ov_left; - mdev->rs_mark_time[i] = now; - } - - drbd_rs_controller_reset(mdev); - - if (ns.conn == C_VERIFY_S) { - dev_info(DEV, "Starting Online Verify from sector %llu\n", - (unsigned long long)mdev->ov_position); - mod_timer(&mdev->resync_timer, jiffies); - } - } - - if (get_ldev(mdev)) { - u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND| - MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE| - MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY); - - if (test_bit(CRASHED_PRIMARY, &mdev->flags)) - mdf |= MDF_CRASHED_PRIMARY; - if (mdev->state.role == R_PRIMARY || - (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY)) - mdf |= MDF_PRIMARY_IND; - if (mdev->state.conn > C_WF_REPORT_PARAMS) - mdf |= MDF_CONNECTED_IND; - if (mdev->state.disk > D_INCONSISTENT) - mdf |= MDF_CONSISTENT; - if (mdev->state.disk > D_OUTDATED) - mdf |= MDF_WAS_UP_TO_DATE; - if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT) - mdf |= MDF_PEER_OUT_DATED; - if (mdf != mdev->ldev->md.flags) { - mdev->ldev->md.flags = mdf; - drbd_md_mark_dirty(mdev); - } - if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT) - drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]); - put_ldev(mdev); - } - - /* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */ - if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT && - os.peer == R_SECONDARY && ns.peer == R_PRIMARY) - set_bit(CONSIDER_RESYNC, &mdev->flags); - - /* Receiver should clean up itself */ - if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING) - drbd_thread_stop_nowait(&mdev->tconn->receiver); - - /* Now the receiver finished cleaning up itself, it should die */ - if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE) - drbd_thread_stop_nowait(&mdev->tconn->receiver); - - /* Upon network failure, we need to restart the receiver. */ - if (os.conn > C_TEAR_DOWN && - ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) - drbd_thread_restart_nowait(&mdev->tconn->receiver); - - /* Resume AL writing if we get a connection */ - if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) - drbd_resume_al(mdev); - - ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC); - if (ascw) { - ascw->os = os; - ascw->ns = ns; - ascw->flags = flags; - ascw->w.cb = w_after_state_ch; - ascw->done = done; - drbd_queue_work(&mdev->tconn->data.work, &ascw->w); - } else { - dev_warn(DEV, "Could not kmalloc an ascw\n"); - } - - return rv; -} - -static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused) -{ - struct after_state_chg_work *ascw = - container_of(w, struct after_state_chg_work, w); - after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags); - if (ascw->flags & CS_WAIT_COMPLETE) { - D_ASSERT(ascw->done != NULL); - complete(ascw->done); - } - kfree(ascw); - - return 1; -} - -static void abw_start_sync(struct drbd_conf *mdev, int rv) -{ - if (rv) { - dev_err(DEV, "Writing the bitmap failed not starting resync.\n"); - _drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE); - return; - } - - switch (mdev->state.conn) { - case C_STARTING_SYNC_T: - _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); - break; - case C_STARTING_SYNC_S: - drbd_start_resync(mdev, C_SYNC_SOURCE); - break; - } -} - -int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, - int (*io_fn)(struct drbd_conf *), - char *why, enum bm_flag flags) -{ - int rv; - - D_ASSERT(current == mdev->tconn->worker.task); - - /* open coded non-blocking drbd_suspend_io(mdev); */ - set_bit(SUSPEND_IO, &mdev->flags); - - drbd_bm_lock(mdev, why, flags); - rv = io_fn(mdev); - drbd_bm_unlock(mdev); - - drbd_resume_io(mdev); - - return rv; -} - -/** - * after_state_ch() - Perform after state change actions that may sleep - * @mdev: DRBD device. - * @os: old state. - * @ns: new state. - * @flags: Flags - */ -static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, - union drbd_state ns, enum chg_state_flags flags) -{ - enum drbd_fencing_p fp; - enum drbd_req_event what = NOTHING; - union drbd_state nsm = (union drbd_state){ .i = -1 }; - - if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) { - clear_bit(CRASHED_PRIMARY, &mdev->flags); - if (mdev->p_uuid) - mdev->p_uuid[UI_FLAGS] &= ~((u64)2); - } - - fp = FP_DONT_CARE; - if (get_ldev(mdev)) { - fp = mdev->ldev->dc.fencing; - put_ldev(mdev); - } - - /* Inform userspace about the change... */ - drbd_bcast_state(mdev, ns); - - if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) && - (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)) - drbd_khelper(mdev, "pri-on-incon-degr"); - - /* Here we have the actions that are performed after a - state change. This function might sleep */ - - nsm.i = -1; - if (ns.susp_nod) { - if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) - what = RESEND; - - if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING) - what = RESTART_FROZEN_DISK_IO; - - if (what != NOTHING) - nsm.susp_nod = 0; - } - - if (ns.susp_fen) { - /* case1: The outdate peer handler is successful: */ - if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) { - tl_clear(mdev); - if (test_bit(NEW_CUR_UUID, &mdev->flags)) { - drbd_uuid_new_current(mdev); - clear_bit(NEW_CUR_UUID, &mdev->flags); - } - spin_lock_irq(&mdev->tconn->req_lock); - _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL); - spin_unlock_irq(&mdev->tconn->req_lock); - } - /* case2: The connection was established again: */ - if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { - clear_bit(NEW_CUR_UUID, &mdev->flags); - what = RESEND; - nsm.susp_fen = 0; - } - } - - if (what != NOTHING) { - spin_lock_irq(&mdev->tconn->req_lock); - _tl_restart(mdev, what); - nsm.i &= mdev->state.i; - _drbd_set_state(mdev, nsm, CS_VERBOSE, NULL); - spin_unlock_irq(&mdev->tconn->req_lock); - } - - /* Became sync source. With protocol >= 96, we still need to send out - * the sync uuid now. Need to do that before any drbd_send_state, or - * the other side may go "paused sync" before receiving the sync uuids, - * which is unexpected. */ - if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) && - (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) && - mdev->tconn->agreed_pro_version >= 96 && get_ldev(mdev)) { - drbd_gen_and_send_sync_uuid(mdev); - put_ldev(mdev); - } - - /* Do not change the order of the if above and the two below... */ - if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */ - drbd_send_uuids(mdev); - drbd_send_state(mdev); - } - /* No point in queuing send_bitmap if we don't have a connection - * anymore, so check also the _current_ state, not only the new state - * at the time this work was queued. */ - if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S && - mdev->state.conn == C_WF_BITMAP_S) - drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, - "send_bitmap (WFBitMapS)", - BM_LOCKED_TEST_ALLOWED); - - /* Lost contact to peer's copy of the data */ - if ((os.pdsk >= D_INCONSISTENT && - os.pdsk != D_UNKNOWN && - os.pdsk != D_OUTDATED) - && (ns.pdsk < D_INCONSISTENT || - ns.pdsk == D_UNKNOWN || - ns.pdsk == D_OUTDATED)) { - if (get_ldev(mdev)) { - if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && - mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { - if (is_susp(mdev->state)) { - set_bit(NEW_CUR_UUID, &mdev->flags); - } else { - drbd_uuid_new_current(mdev); - drbd_send_uuids(mdev); - } - } - put_ldev(mdev); - } - } - - if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { - if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) { - drbd_uuid_new_current(mdev); - drbd_send_uuids(mdev); - } - - /* D_DISKLESS Peer becomes secondary */ - if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) - /* We may still be Primary ourselves. - * No harm done if the bitmap still changes, - * redirtied pages will follow later. */ - drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, - "demote diskless peer", BM_LOCKED_SET_ALLOWED); - put_ldev(mdev); - } - - /* Write out all changed bits on demote. - * Though, no need to da that just yet - * if there is a resync going on still */ - if (os.role == R_PRIMARY && ns.role == R_SECONDARY && - mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) { - /* No changes to the bitmap expected this time, so assert that, - * even though no harm was done if it did change. */ - drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, - "demote", BM_LOCKED_TEST_ALLOWED); - put_ldev(mdev); - } - - /* Last part of the attaching process ... */ - if (ns.conn >= C_CONNECTED && - os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { - drbd_send_sizes(mdev, 0, 0); /* to start sync... */ - drbd_send_uuids(mdev); - drbd_send_state(mdev); - } - - /* We want to pause/continue resync, tell peer. */ - if (ns.conn >= C_CONNECTED && - ((os.aftr_isp != ns.aftr_isp) || - (os.user_isp != ns.user_isp))) - drbd_send_state(mdev); - - /* In case one of the isp bits got set, suspend other devices. */ - if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) && - (ns.aftr_isp || ns.peer_isp || ns.user_isp)) - suspend_other_sg(mdev); - - /* Make sure the peer gets informed about eventual state - changes (ISP bits) while we were in WFReportParams. */ - if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) - drbd_send_state(mdev); - - if (os.conn != C_AHEAD && ns.conn == C_AHEAD) - drbd_send_state(mdev); - - /* We are in the progress to start a full sync... */ - if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || - (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S)) - /* no other bitmap changes expected during this phase */ - drbd_queue_bitmap_io(mdev, - &drbd_bmio_set_n_write, &abw_start_sync, - "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED); - - /* We are invalidating our self... */ - if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED && - os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT) - /* other bitmap operation expected during this phase */ - drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, - "set_n_write from invalidate", BM_LOCKED_MASK); - - /* first half of local IO error, failure to attach, - * or administrative detach */ - if (os.disk != D_FAILED && ns.disk == D_FAILED) { - enum drbd_io_error_p eh; - int was_io_error; - /* corresponding get_ldev was in __drbd_set_state, to serialize - * our cleanup here with the transition to D_DISKLESS, - * so it is safe to dreference ldev here. */ - eh = mdev->ldev->dc.on_io_error; - was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); - - /* current state still has to be D_FAILED, - * there is only one way out: to D_DISKLESS, - * and that may only happen after our put_ldev below. */ - if (mdev->state.disk != D_FAILED) - dev_err(DEV, - "ASSERT FAILED: disk is %s during detach\n", - drbd_disk_str(mdev->state.disk)); - - if (drbd_send_state(mdev)) - dev_warn(DEV, "Notified peer that I am detaching my disk\n"); - else - dev_err(DEV, "Sending state for detaching disk failed\n"); - - drbd_rs_cancel_all(mdev); - - /* In case we want to get something to stable storage still, - * this may be the last chance. - * Following put_ldev may transition to D_DISKLESS. */ - drbd_md_sync(mdev); - put_ldev(mdev); - - if (was_io_error && eh == EP_CALL_HELPER) - drbd_khelper(mdev, "local-io-error"); - } - - /* second half of local IO error, failure to attach, - * or administrative detach, - * after local_cnt references have reached zero again */ - if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) { - /* We must still be diskless, - * re-attach has to be serialized with this! */ - if (mdev->state.disk != D_DISKLESS) - dev_err(DEV, - "ASSERT FAILED: disk is %s while going diskless\n", - drbd_disk_str(mdev->state.disk)); - - mdev->rs_total = 0; - mdev->rs_failed = 0; - atomic_set(&mdev->rs_pending_cnt, 0); - - if (drbd_send_state(mdev)) - dev_warn(DEV, "Notified peer that I'm now diskless.\n"); - /* corresponding get_ldev in __drbd_set_state - * this may finally trigger drbd_ldev_destroy. */ - put_ldev(mdev); - } - - /* Notify peer that I had a local IO error, and did not detached.. */ - if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT) - drbd_send_state(mdev); - - /* Disks got bigger while they were detached */ - if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && - test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) { - if (ns.conn == C_CONNECTED) - resync_after_online_grow(mdev); - } - - /* A resync finished or aborted, wake paused devices... */ - if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) || - (os.peer_isp && !ns.peer_isp) || - (os.user_isp && !ns.user_isp)) - resume_next_sg(mdev); - - /* sync target done with resync. Explicitly notify peer, even though - * it should (at least for non-empty resyncs) already know itself. */ - if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) - drbd_send_state(mdev); - - /* This triggers bitmap writeout of potentially still unwritten pages - * if the resync finished cleanly, or aborted because of peer disk - * failure, or because of connection loss. - * For resync aborted because of local disk failure, we cannot do - * any bitmap writeout anymore. - * No harm done if some bits change during this phase. - */ - if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) { - drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, - "write from resync_finished", BM_LOCKED_SET_ALLOWED); - put_ldev(mdev); - } - - /* Upon network connection, we need to start the receiver */ - if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED) - drbd_thread_start(&mdev->tconn->receiver); - - /* Terminate worker thread if we are unconfigured - it will be - restarted as needed... */ - if (ns.disk == D_DISKLESS && - ns.conn == C_STANDALONE && - ns.role == R_SECONDARY) { - if (os.aftr_isp != ns.aftr_isp) - resume_next_sg(mdev); - /* set in __drbd_set_state, unless CONFIG_PENDING was set */ - if (test_bit(DEVICE_DYING, &mdev->flags)) - drbd_thread_stop_nowait(&mdev->tconn->worker); - } - - drbd_md_sync(mdev); -} - - static int drbd_thread_setup(void *arg) { struct drbd_thread *thi = (struct drbd_thread *) arg; diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c new file mode 100644 index 00000000000..38d330b7b66 --- /dev/null +++ b/drivers/block/drbd/drbd_state.c @@ -0,0 +1,1217 @@ +/* + drbd_state.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. + Copyright (C) 1999-2008, Philipp Reisner . + Copyright (C) 2002-2008, Lars Ellenberg . + + Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev + from Logicworks, Inc. for making SDP replication support possible. + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include "drbd_int.h" +#include "drbd_req.h" + +struct after_state_chg_work { + struct drbd_work w; + union drbd_state os; + union drbd_state ns; + enum chg_state_flags flags; + struct completion *done; +}; + + +extern void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what); +int drbd_send_state_req(struct drbd_conf *, union drbd_state, union drbd_state); +static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused); +static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, + union drbd_state ns, enum chg_state_flags flags); +static void after_conn_state_ch(struct drbd_tconn *tconn, union drbd_state os, + union drbd_state ns, enum chg_state_flags flags); +static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); +static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state); +static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, + union drbd_state ns, const char **warn_sync_abort); + +/** + * cl_wide_st_chg() - true if the state change is a cluster wide one + * @mdev: DRBD device. + * @os: old (current) state. + * @ns: new (wanted) state. + */ +static int cl_wide_st_chg(struct drbd_conf *mdev, + union drbd_state os, union drbd_state ns) +{ + return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED && + ((os.role != R_PRIMARY && ns.role == R_PRIMARY) || + (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || + (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) || + (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))) || + (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) || + (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S); +} + +enum drbd_state_rv +drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, + union drbd_state mask, union drbd_state val) +{ + unsigned long flags; + union drbd_state os, ns; + enum drbd_state_rv rv; + + spin_lock_irqsave(&mdev->tconn->req_lock, flags); + os = mdev->state; + ns.i = (os.i & ~mask.i) | val.i; + rv = _drbd_set_state(mdev, ns, f, NULL); + ns = mdev->state; + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); + + return rv; +} + +/** + * drbd_force_state() - Impose a change which happens outside our control on our state + * @mdev: DRBD device. + * @mask: mask of state bits to change. + * @val: value of new state bits. + */ +void drbd_force_state(struct drbd_conf *mdev, + union drbd_state mask, union drbd_state val) +{ + drbd_change_state(mdev, CS_HARD, mask, val); +} + +static enum drbd_state_rv +_req_st_cond(struct drbd_conf *mdev, union drbd_state mask, + union drbd_state val) +{ + union drbd_state os, ns; + unsigned long flags; + enum drbd_state_rv rv; + + if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags)) + return SS_CW_SUCCESS; + + if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags)) + return SS_CW_FAILED_BY_PEER; + + rv = 0; + spin_lock_irqsave(&mdev->tconn->req_lock, flags); + os = mdev->state; + ns.i = (os.i & ~mask.i) | val.i; + ns = sanitize_state(mdev, os, ns, NULL); + + if (!cl_wide_st_chg(mdev, os, ns)) + rv = SS_CW_NO_NEED; + if (!rv) { + rv = is_valid_state(mdev, ns); + if (rv == SS_SUCCESS) { + rv = is_valid_soft_transition(os, ns); + if (rv == SS_SUCCESS) + rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ + } + } + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); + + return rv; +} + +/** + * drbd_req_state() - Perform an eventually cluster wide state change + * @mdev: DRBD device. + * @mask: mask of state bits to change. + * @val: value of new state bits. + * @f: flags + * + * Should not be called directly, use drbd_request_state() or + * _drbd_request_state(). + */ +static enum drbd_state_rv +drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, + union drbd_state val, enum chg_state_flags f) +{ + struct completion done; + unsigned long flags; + union drbd_state os, ns; + enum drbd_state_rv rv; + + init_completion(&done); + + if (f & CS_SERIALIZE) + mutex_lock(&mdev->state_mutex); + + spin_lock_irqsave(&mdev->tconn->req_lock, flags); + os = mdev->state; + ns.i = (os.i & ~mask.i) | val.i; + + ns = sanitize_state(mdev, os, ns, NULL); + + if (cl_wide_st_chg(mdev, os, ns)) { + rv = is_valid_state(mdev, ns); + if (rv == SS_SUCCESS) + rv = is_valid_soft_transition(os, ns); + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); + + if (rv < SS_SUCCESS) { + if (f & CS_VERBOSE) + print_st_err(mdev, os, ns, rv); + goto abort; + } + + drbd_state_lock(mdev); + if (!drbd_send_state_req(mdev, mask, val)) { + drbd_state_unlock(mdev); + rv = SS_CW_FAILED_BY_PEER; + if (f & CS_VERBOSE) + print_st_err(mdev, os, ns, rv); + goto abort; + } + + wait_event(mdev->state_wait, + (rv = _req_st_cond(mdev, mask, val))); + + if (rv < SS_SUCCESS) { + drbd_state_unlock(mdev); + if (f & CS_VERBOSE) + print_st_err(mdev, os, ns, rv); + goto abort; + } + spin_lock_irqsave(&mdev->tconn->req_lock, flags); + os = mdev->state; + ns.i = (os.i & ~mask.i) | val.i; + rv = _drbd_set_state(mdev, ns, f, &done); + drbd_state_unlock(mdev); + } else { + rv = _drbd_set_state(mdev, ns, f, &done); + } + + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); + + if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) { + D_ASSERT(current != mdev->tconn->worker.task); + wait_for_completion(&done); + } + +abort: + if (f & CS_SERIALIZE) + mutex_unlock(&mdev->state_mutex); + + return rv; +} + +/** + * _drbd_request_state() - Request a state change (with flags) + * @mdev: DRBD device. + * @mask: mask of state bits to change. + * @val: value of new state bits. + * @f: flags + * + * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE + * flag, or when logging of failed state change requests is not desired. + */ +enum drbd_state_rv +_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask, + union drbd_state val, enum chg_state_flags f) +{ + enum drbd_state_rv rv; + + wait_event(mdev->state_wait, + (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE); + + return rv; +} + +static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns) +{ + dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n", + name, + drbd_conn_str(ns.conn), + drbd_role_str(ns.role), + drbd_role_str(ns.peer), + drbd_disk_str(ns.disk), + drbd_disk_str(ns.pdsk), + is_susp(ns) ? 's' : 'r', + ns.aftr_isp ? 'a' : '-', + ns.peer_isp ? 'p' : '-', + ns.user_isp ? 'u' : '-', + ns.susp_fen ? 'F' : '-', + ns.susp_nod ? 'N' : '-' + ); +} + +void print_st_err(struct drbd_conf *mdev, union drbd_state os, + union drbd_state ns, enum drbd_state_rv err) +{ + if (err == SS_IN_TRANSIENT_STATE) + return; + dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err)); + print_st(mdev, " state", os); + print_st(mdev, "wanted", ns); +} + + +/** + * is_valid_state() - Returns an SS_ error code if ns is not valid + * @mdev: DRBD device. + * @ns: State to consider. + */ +static enum drbd_state_rv +is_valid_state(struct drbd_conf *mdev, union drbd_state ns) +{ + /* See drbd_state_sw_errors in drbd_strings.c */ + + enum drbd_fencing_p fp; + enum drbd_state_rv rv = SS_SUCCESS; + + fp = FP_DONT_CARE; + if (get_ldev(mdev)) { + fp = mdev->ldev->dc.fencing; + put_ldev(mdev); + } + + if (get_net_conf(mdev->tconn)) { + if (!mdev->tconn->net_conf->two_primaries && + ns.role == R_PRIMARY && ns.peer == R_PRIMARY) + rv = SS_TWO_PRIMARIES; + put_net_conf(mdev->tconn); + } + + if (rv <= 0) + /* already found a reason to abort */; + else if (ns.role == R_SECONDARY && mdev->open_cnt) + rv = SS_DEVICE_IN_USE; + + else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE) + rv = SS_NO_UP_TO_DATE_DISK; + + else if (fp >= FP_RESOURCE && + ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN) + rv = SS_PRIMARY_NOP; + + else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT) + rv = SS_NO_UP_TO_DATE_DISK; + + else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT) + rv = SS_NO_LOCAL_DISK; + + else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT) + rv = SS_NO_REMOTE_DISK; + + else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) + rv = SS_NO_UP_TO_DATE_DISK; + + else if ((ns.conn == C_CONNECTED || + ns.conn == C_WF_BITMAP_S || + ns.conn == C_SYNC_SOURCE || + ns.conn == C_PAUSED_SYNC_S) && + ns.disk == D_OUTDATED) + rv = SS_CONNECTED_OUTDATES; + + else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && + (mdev->sync_conf.verify_alg[0] == 0)) + rv = SS_NO_VERIFY_ALG; + + else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && + mdev->tconn->agreed_pro_version < 88) + rv = SS_NOT_SUPPORTED; + + else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN) + rv = SS_CONNECTED_OUTDATES; + + return rv; +} + +/** + * is_valid_soft_transition() - Returns an SS_ error code if the state transition is not possible + * @mdev: DRBD device. + * @ns: new state. + * @os: old state. + */ +static enum drbd_state_rv +is_valid_soft_transition(union drbd_state os, union drbd_state ns) +{ + enum drbd_state_rv rv = SS_SUCCESS; + + if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) && + os.conn > C_CONNECTED) + rv = SS_RESYNC_RUNNING; + + if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE) + rv = SS_ALREADY_STANDALONE; + + if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS) + rv = SS_IS_DISKLESS; + + if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED) + rv = SS_NO_NET_CONFIG; + + if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING) + rv = SS_LOWER_THAN_OUTDATED; + + if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED) + rv = SS_IN_TRANSIENT_STATE; + + if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS) + rv = SS_IN_TRANSIENT_STATE; + + if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED) + rv = SS_NEED_CONNECTION; + + if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && + ns.conn != os.conn && os.conn > C_CONNECTED) + rv = SS_RESYNC_RUNNING; + + if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) && + os.conn < C_CONNECTED) + rv = SS_NEED_CONNECTION; + + if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE) + && os.conn < C_WF_REPORT_PARAMS) + rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */ + + return rv; +} + +/** + * sanitize_state() - Resolves implicitly necessary additional changes to a state transition + * @mdev: DRBD device. + * @os: old state. + * @ns: new state. + * @warn_sync_abort: + * + * When we loose connection, we have to set the state of the peers disk (pdsk) + * to D_UNKNOWN. This rule and many more along those lines are in this function. + */ +static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, + union drbd_state ns, const char **warn_sync_abort) +{ + enum drbd_fencing_p fp; + enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max; + + fp = FP_DONT_CARE; + if (get_ldev(mdev)) { + fp = mdev->ldev->dc.fencing; + put_ldev(mdev); + } + + /* Disallow Network errors to configure a device's network part */ + if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) && + os.conn <= C_DISCONNECTING) + ns.conn = os.conn; + + /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow. + * If you try to go into some Sync* state, that shall fail (elsewhere). */ + if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN && + ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_TEAR_DOWN) + ns.conn = os.conn; + + /* we cannot fail (again) if we already detached */ + if (ns.disk == D_FAILED && os.disk == D_DISKLESS) + ns.disk = D_DISKLESS; + + /* if we are only D_ATTACHING yet, + * we can (and should) go directly to D_DISKLESS. */ + if (ns.disk == D_FAILED && os.disk == D_ATTACHING) + ns.disk = D_DISKLESS; + + /* After C_DISCONNECTING only C_STANDALONE may follow */ + if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE) + ns.conn = os.conn; + + if (ns.conn < C_CONNECTED) { + ns.peer_isp = 0; + ns.peer = R_UNKNOWN; + if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT) + ns.pdsk = D_UNKNOWN; + } + + /* Clear the aftr_isp when becoming unconfigured */ + if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY) + ns.aftr_isp = 0; + + /* Abort resync if a disk fails/detaches */ + if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED && + (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { + if (warn_sync_abort) + *warn_sync_abort = + os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ? + "Online-verify" : "Resync"; + ns.conn = C_CONNECTED; + } + + /* Connection breaks down before we finished "Negotiating" */ + if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING && + get_ldev_if_state(mdev, D_NEGOTIATING)) { + if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) { + ns.disk = mdev->new_state_tmp.disk; + ns.pdsk = mdev->new_state_tmp.pdsk; + } else { + dev_alert(DEV, "Connection lost while negotiating, no data!\n"); + ns.disk = D_DISKLESS; + ns.pdsk = D_UNKNOWN; + } + put_ldev(mdev); + } + + /* D_CONSISTENT and D_OUTDATED vanish when we get connected */ + if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) { + if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED) + ns.disk = D_UP_TO_DATE; + if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED) + ns.pdsk = D_UP_TO_DATE; + } + + /* Implications of the connection stat on the disk states */ + disk_min = D_DISKLESS; + disk_max = D_UP_TO_DATE; + pdsk_min = D_INCONSISTENT; + pdsk_max = D_UNKNOWN; + switch ((enum drbd_conns)ns.conn) { + case C_WF_BITMAP_T: + case C_PAUSED_SYNC_T: + case C_STARTING_SYNC_T: + case C_WF_SYNC_UUID: + case C_BEHIND: + disk_min = D_INCONSISTENT; + disk_max = D_OUTDATED; + pdsk_min = D_UP_TO_DATE; + pdsk_max = D_UP_TO_DATE; + break; + case C_VERIFY_S: + case C_VERIFY_T: + disk_min = D_UP_TO_DATE; + disk_max = D_UP_TO_DATE; + pdsk_min = D_UP_TO_DATE; + pdsk_max = D_UP_TO_DATE; + break; + case C_CONNECTED: + disk_min = D_DISKLESS; + disk_max = D_UP_TO_DATE; + pdsk_min = D_DISKLESS; + pdsk_max = D_UP_TO_DATE; + break; + case C_WF_BITMAP_S: + case C_PAUSED_SYNC_S: + case C_STARTING_SYNC_S: + case C_AHEAD: + disk_min = D_UP_TO_DATE; + disk_max = D_UP_TO_DATE; + pdsk_min = D_INCONSISTENT; + pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/ + break; + case C_SYNC_TARGET: + disk_min = D_INCONSISTENT; + disk_max = D_INCONSISTENT; + pdsk_min = D_UP_TO_DATE; + pdsk_max = D_UP_TO_DATE; + break; + case C_SYNC_SOURCE: + disk_min = D_UP_TO_DATE; + disk_max = D_UP_TO_DATE; + pdsk_min = D_INCONSISTENT; + pdsk_max = D_INCONSISTENT; + break; + case C_STANDALONE: + case C_DISCONNECTING: + case C_UNCONNECTED: + case C_TIMEOUT: + case C_BROKEN_PIPE: + case C_NETWORK_FAILURE: + case C_PROTOCOL_ERROR: + case C_TEAR_DOWN: + case C_WF_CONNECTION: + case C_WF_REPORT_PARAMS: + case C_MASK: + break; + } + if (ns.disk > disk_max) + ns.disk = disk_max; + + if (ns.disk < disk_min) { + dev_warn(DEV, "Implicitly set disk from %s to %s\n", + drbd_disk_str(ns.disk), drbd_disk_str(disk_min)); + ns.disk = disk_min; + } + if (ns.pdsk > pdsk_max) + ns.pdsk = pdsk_max; + + if (ns.pdsk < pdsk_min) { + dev_warn(DEV, "Implicitly set pdsk from %s to %s\n", + drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min)); + ns.pdsk = pdsk_min; + } + + if (fp == FP_STONITH && + (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) && + !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED)) + ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */ + + if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO && + (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) && + !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE)) + ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */ + + if (ns.aftr_isp || ns.peer_isp || ns.user_isp) { + if (ns.conn == C_SYNC_SOURCE) + ns.conn = C_PAUSED_SYNC_S; + if (ns.conn == C_SYNC_TARGET) + ns.conn = C_PAUSED_SYNC_T; + } else { + if (ns.conn == C_PAUSED_SYNC_S) + ns.conn = C_SYNC_SOURCE; + if (ns.conn == C_PAUSED_SYNC_T) + ns.conn = C_SYNC_TARGET; + } + + return ns; +} + +void drbd_resume_al(struct drbd_conf *mdev) +{ + if (test_and_clear_bit(AL_SUSPENDED, &mdev->flags)) + dev_info(DEV, "Resumed AL updates\n"); +} + +/* helper for __drbd_set_state */ +static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs) +{ + if (mdev->tconn->agreed_pro_version < 90) + mdev->ov_start_sector = 0; + mdev->rs_total = drbd_bm_bits(mdev); + mdev->ov_position = 0; + if (cs == C_VERIFY_T) { + /* starting online verify from an arbitrary position + * does not fit well into the existing protocol. + * on C_VERIFY_T, we initialize ov_left and friends + * implicitly in receive_DataRequest once the + * first P_OV_REQUEST is received */ + mdev->ov_start_sector = ~(sector_t)0; + } else { + unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector); + if (bit >= mdev->rs_total) { + mdev->ov_start_sector = + BM_BIT_TO_SECT(mdev->rs_total - 1); + mdev->rs_total = 1; + } else + mdev->rs_total -= bit; + mdev->ov_position = mdev->ov_start_sector; + } + mdev->ov_left = mdev->rs_total; +} + +/** + * __drbd_set_state() - Set a new DRBD state + * @mdev: DRBD device. + * @ns: new state. + * @flags: Flags + * @done: Optional completion, that will get completed after the after_state_ch() finished + * + * Caller needs to hold req_lock, and global_state_lock. Do not call directly. + */ +enum drbd_state_rv +__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, + enum chg_state_flags flags, struct completion *done) +{ + union drbd_state os; + enum drbd_state_rv rv = SS_SUCCESS; + const char *warn_sync_abort = NULL; + struct after_state_chg_work *ascw; + + os = mdev->state; + + ns = sanitize_state(mdev, os, ns, &warn_sync_abort); + + if (ns.i == os.i) + return SS_NOTHING_TO_DO; + + if (!(flags & CS_HARD)) { + /* pre-state-change checks ; only look at ns */ + /* See drbd_state_sw_errors in drbd_strings.c */ + + rv = is_valid_state(mdev, ns); + if (rv < SS_SUCCESS) { + /* If the old state was illegal as well, then let + this happen...*/ + + if (is_valid_state(mdev, os) == rv) + rv = is_valid_soft_transition(os, ns); + } else + rv = is_valid_soft_transition(os, ns); + } + + if (rv < SS_SUCCESS) { + if (flags & CS_VERBOSE) + print_st_err(mdev, os, ns, rv); + return rv; + } + + if (warn_sync_abort) + dev_warn(DEV, "%s aborted.\n", warn_sync_abort); + + { + char *pbp, pb[300]; + pbp = pb; + *pbp = 0; + if (ns.role != os.role) + pbp += sprintf(pbp, "role( %s -> %s ) ", + drbd_role_str(os.role), + drbd_role_str(ns.role)); + if (ns.peer != os.peer) + pbp += sprintf(pbp, "peer( %s -> %s ) ", + drbd_role_str(os.peer), + drbd_role_str(ns.peer)); + if (ns.conn != os.conn) + pbp += sprintf(pbp, "conn( %s -> %s ) ", + drbd_conn_str(os.conn), + drbd_conn_str(ns.conn)); + if (ns.disk != os.disk) + pbp += sprintf(pbp, "disk( %s -> %s ) ", + drbd_disk_str(os.disk), + drbd_disk_str(ns.disk)); + if (ns.pdsk != os.pdsk) + pbp += sprintf(pbp, "pdsk( %s -> %s ) ", + drbd_disk_str(os.pdsk), + drbd_disk_str(ns.pdsk)); + if (is_susp(ns) != is_susp(os)) + pbp += sprintf(pbp, "susp( %d -> %d ) ", + is_susp(os), + is_susp(ns)); + if (ns.aftr_isp != os.aftr_isp) + pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ", + os.aftr_isp, + ns.aftr_isp); + if (ns.peer_isp != os.peer_isp) + pbp += sprintf(pbp, "peer_isp( %d -> %d ) ", + os.peer_isp, + ns.peer_isp); + if (ns.user_isp != os.user_isp) + pbp += sprintf(pbp, "user_isp( %d -> %d ) ", + os.user_isp, + ns.user_isp); + dev_info(DEV, "%s\n", pb); + } + + /* solve the race between becoming unconfigured, + * worker doing the cleanup, and + * admin reconfiguring us: + * on (re)configure, first set CONFIG_PENDING, + * then wait for a potentially exiting worker, + * start the worker, and schedule one no_op. + * then proceed with configuration. + */ + if (ns.disk == D_DISKLESS && + ns.conn == C_STANDALONE && + ns.role == R_SECONDARY && + !test_and_set_bit(CONFIG_PENDING, &mdev->flags)) + set_bit(DEVICE_DYING, &mdev->flags); + + /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference + * on the ldev here, to be sure the transition -> D_DISKLESS resp. + * drbd_ldev_destroy() won't happen before our corresponding + * after_state_ch works run, where we put_ldev again. */ + if ((os.disk != D_FAILED && ns.disk == D_FAILED) || + (os.disk != D_DISKLESS && ns.disk == D_DISKLESS)) + atomic_inc(&mdev->local_cnt); + + mdev->state = ns; + + if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) + drbd_print_uuids(mdev, "attached to UUIDs"); + + wake_up(&mdev->misc_wait); + wake_up(&mdev->state_wait); + + /* aborted verify run. log the last position */ + if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && + ns.conn < C_CONNECTED) { + mdev->ov_start_sector = + BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left); + dev_info(DEV, "Online Verify reached sector %llu\n", + (unsigned long long)mdev->ov_start_sector); + } + + if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) && + (ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)) { + dev_info(DEV, "Syncer continues.\n"); + mdev->rs_paused += (long)jiffies + -(long)mdev->rs_mark_time[mdev->rs_last_mark]; + if (ns.conn == C_SYNC_TARGET) + mod_timer(&mdev->resync_timer, jiffies); + } + + if ((os.conn == C_SYNC_TARGET || os.conn == C_SYNC_SOURCE) && + (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) { + dev_info(DEV, "Resync suspended\n"); + mdev->rs_mark_time[mdev->rs_last_mark] = jiffies; + } + + if (os.conn == C_CONNECTED && + (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) { + unsigned long now = jiffies; + int i; + + set_ov_position(mdev, ns.conn); + mdev->rs_start = now; + mdev->rs_last_events = 0; + mdev->rs_last_sect_ev = 0; + mdev->ov_last_oos_size = 0; + mdev->ov_last_oos_start = 0; + + for (i = 0; i < DRBD_SYNC_MARKS; i++) { + mdev->rs_mark_left[i] = mdev->ov_left; + mdev->rs_mark_time[i] = now; + } + + drbd_rs_controller_reset(mdev); + + if (ns.conn == C_VERIFY_S) { + dev_info(DEV, "Starting Online Verify from sector %llu\n", + (unsigned long long)mdev->ov_position); + mod_timer(&mdev->resync_timer, jiffies); + } + } + + if (get_ldev(mdev)) { + u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND| + MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE| + MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY); + + if (test_bit(CRASHED_PRIMARY, &mdev->flags)) + mdf |= MDF_CRASHED_PRIMARY; + if (mdev->state.role == R_PRIMARY || + (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY)) + mdf |= MDF_PRIMARY_IND; + if (mdev->state.conn > C_WF_REPORT_PARAMS) + mdf |= MDF_CONNECTED_IND; + if (mdev->state.disk > D_INCONSISTENT) + mdf |= MDF_CONSISTENT; + if (mdev->state.disk > D_OUTDATED) + mdf |= MDF_WAS_UP_TO_DATE; + if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT) + mdf |= MDF_PEER_OUT_DATED; + if (mdf != mdev->ldev->md.flags) { + mdev->ldev->md.flags = mdf; + drbd_md_mark_dirty(mdev); + } + if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT) + drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]); + put_ldev(mdev); + } + + /* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */ + if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT && + os.peer == R_SECONDARY && ns.peer == R_PRIMARY) + set_bit(CONSIDER_RESYNC, &mdev->flags); + + /* Receiver should clean up itself */ + if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING) + drbd_thread_stop_nowait(&mdev->tconn->receiver); + + /* Now the receiver finished cleaning up itself, it should die */ + if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE) + drbd_thread_stop_nowait(&mdev->tconn->receiver); + + /* Upon network failure, we need to restart the receiver. */ + if (os.conn > C_TEAR_DOWN && + ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) + drbd_thread_restart_nowait(&mdev->tconn->receiver); + + /* Resume AL writing if we get a connection */ + if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) + drbd_resume_al(mdev); + + ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC); + if (ascw) { + ascw->os = os; + ascw->ns = ns; + ascw->flags = flags; + ascw->w.cb = w_after_state_ch; + ascw->done = done; + drbd_queue_work(&mdev->tconn->data.work, &ascw->w); + } else { + dev_warn(DEV, "Could not kmalloc an ascw\n"); + } + + return rv; +} + +static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused) +{ + struct after_state_chg_work *ascw = + container_of(w, struct after_state_chg_work, w); + + after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags); + if (ascw->flags & CS_WAIT_COMPLETE) { + D_ASSERT(ascw->done != NULL); + complete(ascw->done); + } + kfree(ascw); + + return 1; +} + +static void abw_start_sync(struct drbd_conf *mdev, int rv) +{ + if (rv) { + dev_err(DEV, "Writing the bitmap failed not starting resync.\n"); + _drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE); + return; + } + + switch (mdev->state.conn) { + case C_STARTING_SYNC_T: + _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); + break; + case C_STARTING_SYNC_S: + drbd_start_resync(mdev, C_SYNC_SOURCE); + break; + } +} + +int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, + int (*io_fn)(struct drbd_conf *), + char *why, enum bm_flag flags) +{ + int rv; + + D_ASSERT(current == mdev->tconn->worker.task); + + /* open coded non-blocking drbd_suspend_io(mdev); */ + set_bit(SUSPEND_IO, &mdev->flags); + + drbd_bm_lock(mdev, why, flags); + rv = io_fn(mdev); + drbd_bm_unlock(mdev); + + drbd_resume_io(mdev); + + return rv; +} + +/** + * after_state_ch() - Perform after state change actions that may sleep + * @mdev: DRBD device. + * @os: old state. + * @ns: new state. + * @flags: Flags + */ +static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, + union drbd_state ns, enum chg_state_flags flags) +{ + enum drbd_fencing_p fp; + enum drbd_req_event what = NOTHING; + union drbd_state nsm = (union drbd_state){ .i = -1 }; + + if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) { + clear_bit(CRASHED_PRIMARY, &mdev->flags); + if (mdev->p_uuid) + mdev->p_uuid[UI_FLAGS] &= ~((u64)2); + } + + fp = FP_DONT_CARE; + if (get_ldev(mdev)) { + fp = mdev->ldev->dc.fencing; + put_ldev(mdev); + } + + /* Inform userspace about the change... */ + drbd_bcast_state(mdev, ns); + + if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) && + (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)) + drbd_khelper(mdev, "pri-on-incon-degr"); + + /* Here we have the actions that are performed after a + state change. This function might sleep */ + + nsm.i = -1; + if (ns.susp_nod) { + if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) + what = RESEND; + + if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING) + what = RESTART_FROZEN_DISK_IO; + + if (what != NOTHING) + nsm.susp_nod = 0; + } + + if (ns.susp_fen) { + /* case1: The outdate peer handler is successful: */ + if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) { + tl_clear(mdev); + if (test_bit(NEW_CUR_UUID, &mdev->flags)) { + drbd_uuid_new_current(mdev); + clear_bit(NEW_CUR_UUID, &mdev->flags); + } + spin_lock_irq(&mdev->tconn->req_lock); + _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL); + spin_unlock_irq(&mdev->tconn->req_lock); + } + /* case2: The connection was established again: */ + if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { + clear_bit(NEW_CUR_UUID, &mdev->flags); + what = RESEND; + nsm.susp_fen = 0; + } + } + + if (what != NOTHING) { + spin_lock_irq(&mdev->tconn->req_lock); + _tl_restart(mdev, what); + nsm.i &= mdev->state.i; + _drbd_set_state(mdev, nsm, CS_VERBOSE, NULL); + spin_unlock_irq(&mdev->tconn->req_lock); + } + + /* Became sync source. With protocol >= 96, we still need to send out + * the sync uuid now. Need to do that before any drbd_send_state, or + * the other side may go "paused sync" before receiving the sync uuids, + * which is unexpected. */ + if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) && + (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) && + mdev->tconn->agreed_pro_version >= 96 && get_ldev(mdev)) { + drbd_gen_and_send_sync_uuid(mdev); + put_ldev(mdev); + } + + /* Do not change the order of the if above and the two below... */ + if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */ + drbd_send_uuids(mdev); + drbd_send_state(mdev); + } + /* No point in queuing send_bitmap if we don't have a connection + * anymore, so check also the _current_ state, not only the new state + * at the time this work was queued. */ + if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S && + mdev->state.conn == C_WF_BITMAP_S) + drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, + "send_bitmap (WFBitMapS)", + BM_LOCKED_TEST_ALLOWED); + + /* Lost contact to peer's copy of the data */ + if ((os.pdsk >= D_INCONSISTENT && + os.pdsk != D_UNKNOWN && + os.pdsk != D_OUTDATED) + && (ns.pdsk < D_INCONSISTENT || + ns.pdsk == D_UNKNOWN || + ns.pdsk == D_OUTDATED)) { + if (get_ldev(mdev)) { + if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && + mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { + if (is_susp(mdev->state)) { + set_bit(NEW_CUR_UUID, &mdev->flags); + } else { + drbd_uuid_new_current(mdev); + drbd_send_uuids(mdev); + } + } + put_ldev(mdev); + } + } + + if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { + if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) { + drbd_uuid_new_current(mdev); + drbd_send_uuids(mdev); + } + + /* D_DISKLESS Peer becomes secondary */ + if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) + /* We may still be Primary ourselves. + * No harm done if the bitmap still changes, + * redirtied pages will follow later. */ + drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, + "demote diskless peer", BM_LOCKED_SET_ALLOWED); + put_ldev(mdev); + } + + /* Write out all changed bits on demote. + * Though, no need to da that just yet + * if there is a resync going on still */ + if (os.role == R_PRIMARY && ns.role == R_SECONDARY && + mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) { + /* No changes to the bitmap expected this time, so assert that, + * even though no harm was done if it did change. */ + drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, + "demote", BM_LOCKED_TEST_ALLOWED); + put_ldev(mdev); + } + + /* Last part of the attaching process ... */ + if (ns.conn >= C_CONNECTED && + os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { + drbd_send_sizes(mdev, 0, 0); /* to start sync... */ + drbd_send_uuids(mdev); + drbd_send_state(mdev); + } + + /* We want to pause/continue resync, tell peer. */ + if (ns.conn >= C_CONNECTED && + ((os.aftr_isp != ns.aftr_isp) || + (os.user_isp != ns.user_isp))) + drbd_send_state(mdev); + + /* In case one of the isp bits got set, suspend other devices. */ + if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) && + (ns.aftr_isp || ns.peer_isp || ns.user_isp)) + suspend_other_sg(mdev); + + /* Make sure the peer gets informed about eventual state + changes (ISP bits) while we were in WFReportParams. */ + if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) + drbd_send_state(mdev); + + if (os.conn != C_AHEAD && ns.conn == C_AHEAD) + drbd_send_state(mdev); + + /* We are in the progress to start a full sync... */ + if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || + (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S)) + /* no other bitmap changes expected during this phase */ + drbd_queue_bitmap_io(mdev, + &drbd_bmio_set_n_write, &abw_start_sync, + "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED); + + /* We are invalidating our self... */ + if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED && + os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT) + /* other bitmap operation expected during this phase */ + drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, + "set_n_write from invalidate", BM_LOCKED_MASK); + + /* first half of local IO error, failure to attach, + * or administrative detach */ + if (os.disk != D_FAILED && ns.disk == D_FAILED) { + enum drbd_io_error_p eh; + int was_io_error; + /* corresponding get_ldev was in __drbd_set_state, to serialize + * our cleanup here with the transition to D_DISKLESS, + * so it is safe to dreference ldev here. */ + eh = mdev->ldev->dc.on_io_error; + was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); + + /* current state still has to be D_FAILED, + * there is only one way out: to D_DISKLESS, + * and that may only happen after our put_ldev below. */ + if (mdev->state.disk != D_FAILED) + dev_err(DEV, + "ASSERT FAILED: disk is %s during detach\n", + drbd_disk_str(mdev->state.disk)); + + if (drbd_send_state(mdev)) + dev_warn(DEV, "Notified peer that I am detaching my disk\n"); + else + dev_err(DEV, "Sending state for detaching disk failed\n"); + + drbd_rs_cancel_all(mdev); + + /* In case we want to get something to stable storage still, + * this may be the last chance. + * Following put_ldev may transition to D_DISKLESS. */ + drbd_md_sync(mdev); + put_ldev(mdev); + + if (was_io_error && eh == EP_CALL_HELPER) + drbd_khelper(mdev, "local-io-error"); + } + + /* second half of local IO error, failure to attach, + * or administrative detach, + * after local_cnt references have reached zero again */ + if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) { + /* We must still be diskless, + * re-attach has to be serialized with this! */ + if (mdev->state.disk != D_DISKLESS) + dev_err(DEV, + "ASSERT FAILED: disk is %s while going diskless\n", + drbd_disk_str(mdev->state.disk)); + + mdev->rs_total = 0; + mdev->rs_failed = 0; + atomic_set(&mdev->rs_pending_cnt, 0); + + if (drbd_send_state(mdev)) + dev_warn(DEV, "Notified peer that I'm now diskless.\n"); + /* corresponding get_ldev in __drbd_set_state + * this may finally trigger drbd_ldev_destroy. */ + put_ldev(mdev); + } + + /* Notify peer that I had a local IO error, and did not detached.. */ + if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT) + drbd_send_state(mdev); + + /* Disks got bigger while they were detached */ + if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && + test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) { + if (ns.conn == C_CONNECTED) + resync_after_online_grow(mdev); + } + + /* A resync finished or aborted, wake paused devices... */ + if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) || + (os.peer_isp && !ns.peer_isp) || + (os.user_isp && !ns.user_isp)) + resume_next_sg(mdev); + + /* sync target done with resync. Explicitly notify peer, even though + * it should (at least for non-empty resyncs) already know itself. */ + if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) + drbd_send_state(mdev); + + /* This triggers bitmap writeout of potentially still unwritten pages + * if the resync finished cleanly, or aborted because of peer disk + * failure, or because of connection loss. + * For resync aborted because of local disk failure, we cannot do + * any bitmap writeout anymore. + * No harm done if some bits change during this phase. + */ + if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) { + drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, + "write from resync_finished", BM_LOCKED_SET_ALLOWED); + put_ldev(mdev); + } + + if (ns.disk == D_DISKLESS && + ns.conn == C_STANDALONE && + ns.role == R_SECONDARY) { + if (os.aftr_isp != ns.aftr_isp) + resume_next_sg(mdev); + } + + after_conn_state_ch(mdev->tconn, os, ns, flags); + drbd_md_sync(mdev); +} + +static void after_conn_state_ch(struct drbd_tconn *tconn, union drbd_state os, + union drbd_state ns, enum chg_state_flags flags) +{ + /* Upon network configuration, we need to start the receiver */ + if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED) + drbd_thread_start(&tconn->receiver); + + if (ns.disk == D_DISKLESS && + ns.conn == C_STANDALONE && + ns.role == R_SECONDARY) { + /* if (test_bit(DEVICE_DYING, &mdev->flags)) TODO: DEVICE_DYING functionality */ + drbd_thread_stop_nowait(&tconn->worker); + } +} diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h new file mode 100644 index 00000000000..3ec26e2c4c4 --- /dev/null +++ b/drivers/block/drbd/drbd_state.h @@ -0,0 +1,101 @@ +#ifndef DRBD_STATE_H +#define DRBD_STATE_H + +struct drbd_conf; + +/** + * DOC: DRBD State macros + * + * These macros are used to express state changes in easily readable form. + * + * The NS macros expand to a mask and a value, that can be bit ored onto the + * current state as soon as the spinlock (req_lock) was taken. + * + * The _NS macros are used for state functions that get called with the + * spinlock. These macros expand directly to the new state value. + * + * Besides the basic forms NS() and _NS() additional _?NS[23] are defined + * to express state changes that affect more than one aspect of the state. + * + * E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY) + * Means that the network connection was established and that the peer + * is in secondary role. + */ +#define role_MASK R_MASK +#define peer_MASK R_MASK +#define disk_MASK D_MASK +#define pdsk_MASK D_MASK +#define conn_MASK C_MASK +#define susp_MASK 1 +#define user_isp_MASK 1 +#define aftr_isp_MASK 1 +#define susp_nod_MASK 1 +#define susp_fen_MASK 1 + +#define NS(T, S) \ + ({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \ + ({ union drbd_state val; val.i = 0; val.T = (S); val; }) +#define NS2(T1, S1, T2, S2) \ + ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \ + mask.T2 = T2##_MASK; mask; }), \ + ({ union drbd_state val; val.i = 0; val.T1 = (S1); \ + val.T2 = (S2); val; }) +#define NS3(T1, S1, T2, S2, T3, S3) \ + ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \ + mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \ + ({ union drbd_state val; val.i = 0; val.T1 = (S1); \ + val.T2 = (S2); val.T3 = (S3); val; }) + +#define _NS(D, T, S) \ + D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T = (S); __ns; }) +#define _NS2(D, T1, S1, T2, S2) \ + D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ + __ns.T2 = (S2); __ns; }) +#define _NS3(D, T1, S1, T2, S2, T3, S3) \ + D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ + __ns.T2 = (S2); __ns.T3 = (S3); __ns; }) + +enum chg_state_flags { + CS_HARD = 1, + CS_VERBOSE = 2, + CS_WAIT_COMPLETE = 4, + CS_SERIALIZE = 8, + CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE, +}; + +extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev, + enum chg_state_flags f, + union drbd_state mask, + union drbd_state val); +extern void drbd_force_state(struct drbd_conf *, union drbd_state, + union drbd_state); +extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *, + union drbd_state, + union drbd_state, + enum chg_state_flags); +extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state, + enum chg_state_flags, + struct completion *done); +extern void print_st_err(struct drbd_conf *, union drbd_state, + union drbd_state, int); + +extern void drbd_resume_al(struct drbd_conf *mdev); + +/** + * drbd_request_state() - Reqest a state change + * @mdev: DRBD device. + * @mask: mask of state bits to change. + * @val: value of new state bits. + * + * This is the most graceful way of requesting a state change. It is verbose + * quite verbose in case the state change is not possible, and all those + * state changes are globally serialized. + */ +static inline int drbd_request_state(struct drbd_conf *mdev, + union drbd_state mask, + union drbd_state val) +{ + return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED); +} + +#endif From bed879ae905190028a90d53493c4f75dcd78f44d Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 4 Feb 2011 14:00:37 +0100 Subject: [PATCH 062/609] drbd: Moved the thread name into the data structure Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 20 +++++--------- drivers/block/drbd/drbd_int.h | 2 ++ drivers/block/drbd/drbd_main.c | 46 +++++++++++++++++++------------- 3 files changed, 36 insertions(+), 32 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 4da4c322fa5..e85221f22ad 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -119,13 +119,9 @@ static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) if (!__ratelimit(&drbd_ratelimit_state)) return; dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n", - current == mdev->tconn->receiver.task ? "receiver" : - current == mdev->tconn->asender.task ? "asender" : - current == mdev->tconn->worker.task ? "worker" : current->comm, - func, b->bm_why ?: "?", - b->bm_task == mdev->tconn->receiver.task ? "receiver" : - b->bm_task == mdev->tconn->asender.task ? "asender" : - b->bm_task == mdev->tconn->worker.task ? "worker" : "?"); + drbd_task_to_thread_name(mdev, current), + func, b->bm_why ?: "?", + drbd_task_to_thread_name(mdev, b->bm_task)); } void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) @@ -142,13 +138,9 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) if (trylock_failed) { dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n", - current == mdev->tconn->receiver.task ? "receiver" : - current == mdev->tconn->asender.task ? "asender" : - current == mdev->tconn->worker.task ? "worker" : current->comm, - why, b->bm_why ?: "?", - b->bm_task == mdev->tconn->receiver.task ? "receiver" : - b->bm_task == mdev->tconn->asender.task ? "asender" : - b->bm_task == mdev->tconn->worker.task ? "worker" : "?"); + drbd_task_to_thread_name(mdev, current), + why, b->bm_why ?: "?", + drbd_task_to_thread_name(mdev, b->bm_task)); mutex_lock(&b->bm_change); } if (BM_LOCKED_MASK & b->bm_flags) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 98addab2c92..7beb374451b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -616,6 +616,7 @@ struct drbd_thread { int (*function) (struct drbd_thread *); struct drbd_conf *mdev; int reset_cpu_mask; + char name[9]; }; static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi) @@ -1130,6 +1131,7 @@ enum dds_flags { extern void drbd_init_set_defaults(struct drbd_conf *mdev); extern int drbd_thread_start(struct drbd_thread *thi); extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait); +extern char *drbd_task_to_thread_name(struct drbd_conf *mdev, struct task_struct *task); #ifdef CONFIG_SMP extern void drbd_thread_current_set_cpu(struct drbd_conf *mdev); extern void drbd_calc_cpu_mask(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 4b39b3d0dd5..852a3e3fbb7 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -462,7 +462,7 @@ restart: */ if (thi->t_state == RESTARTING) { - dev_info(DEV, "Restarting %s\n", current->comm); + dev_info(DEV, "Restarting %s thread\n", thi->name); thi->t_state = RUNNING; spin_unlock_irqrestore(&thi->t_lock, flags); goto restart; @@ -482,13 +482,14 @@ restart: } static void drbd_thread_init(struct drbd_conf *mdev, struct drbd_thread *thi, - int (*func) (struct drbd_thread *)) + int (*func) (struct drbd_thread *), char *name) { spin_lock_init(&thi->t_lock); thi->task = NULL; thi->t_state = NONE; thi->function = func; thi->mdev = mdev; + strncpy(thi->name, name, ARRAY_SIZE(thi->name)); } int drbd_thread_start(struct drbd_thread *thi) @@ -497,11 +498,6 @@ int drbd_thread_start(struct drbd_thread *thi) struct task_struct *nt; unsigned long flags; - const char *me = - thi == &mdev->tconn->receiver ? "receiver" : - thi == &mdev->tconn->asender ? "asender" : - thi == &mdev->tconn->worker ? "worker" : "NONSENSE"; - /* is used from state engine doing drbd_thread_stop_nowait, * while holding the req lock irqsave */ spin_lock_irqsave(&thi->t_lock, flags); @@ -509,7 +505,7 @@ int drbd_thread_start(struct drbd_thread *thi) switch (thi->t_state) { case NONE: dev_info(DEV, "Starting %s thread (from %s [%d])\n", - me, current->comm, current->pid); + thi->name, current->comm, current->pid); /* Get ref on module for thread - this is released when thread exits */ if (!try_module_get(THIS_MODULE)) { @@ -526,7 +522,7 @@ int drbd_thread_start(struct drbd_thread *thi) flush_signals(current); /* otherw. may get -ERESTARTNOINTR */ nt = kthread_create(drbd_thread_setup, (void *) thi, - "drbd%d_%s", mdev_to_minor(mdev), me); + "drbd%d_%s", mdev_to_minor(mdev), thi->name); if (IS_ERR(nt)) { dev_err(DEV, "Couldn't start thread\n"); @@ -543,7 +539,7 @@ int drbd_thread_start(struct drbd_thread *thi) case EXITING: thi->t_state = RESTARTING; dev_info(DEV, "Restarting %s thread (from %s [%d])\n", - me, current->comm, current->pid); + thi->name, current->comm, current->pid); /* fall through */ case RUNNING: case RESTARTING: @@ -592,6 +588,23 @@ void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait) wait_for_completion(&thi->stop); } +static struct drbd_thread *drbd_task_to_thread(struct drbd_conf *mdev, struct task_struct *task) +{ + struct drbd_tconn *tconn = mdev->tconn; + struct drbd_thread *thi = + task == tconn->receiver.task ? &tconn->receiver : + task == tconn->asender.task ? &tconn->asender : + task == tconn->worker.task ? &tconn->worker : NULL; + + return thi; +} + +char *drbd_task_to_thread_name(struct drbd_conf *mdev, struct task_struct *task) +{ + struct drbd_thread *thi = drbd_task_to_thread(mdev, task); + return thi ? thi->name : task->comm; +} + #ifdef CONFIG_SMP /** * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs @@ -629,11 +642,8 @@ void drbd_calc_cpu_mask(struct drbd_conf *mdev) void drbd_thread_current_set_cpu(struct drbd_conf *mdev) { struct task_struct *p = current; - struct drbd_thread *thi = - p == mdev->tconn->asender.task ? &mdev->tconn->asender : - p == mdev->tconn->receiver.task ? &mdev->tconn->receiver : - p == mdev->tconn->worker.task ? &mdev->tconn->worker : - NULL; + struct drbd_thread *thi = drbd_task_to_thread(mdev, p); + if (!expect(thi != NULL)) return; if (!thi->reset_cpu_mask) @@ -1848,9 +1858,9 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) init_waitqueue_head(&mdev->al_wait); init_waitqueue_head(&mdev->seq_wait); - drbd_thread_init(mdev, &mdev->tconn->receiver, drbdd_init); - drbd_thread_init(mdev, &mdev->tconn->worker, drbd_worker); - drbd_thread_init(mdev, &mdev->tconn->asender, drbd_asender); + drbd_thread_init(mdev, &mdev->tconn->receiver, drbdd_init, "receiver"); + drbd_thread_init(mdev, &mdev->tconn->worker, drbd_worker, "worker"); + drbd_thread_init(mdev, &mdev->tconn->asender, drbd_asender, "asender"); /* mdev->tconn->agreed_pro_version gets initialized in drbd_connect() */ mdev->write_ordering = WO_bdev_flush; From bc31fe3352f9cd76195ce6eb638dfc2dac17dc2e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 11:14:38 +0100 Subject: [PATCH 063/609] drbd: Eliminated the user of drbd_task_to_thread() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 ++-- drivers/block/drbd/drbd_main.c | 6 ++---- drivers/block/drbd/drbd_receiver.c | 4 ++-- drivers/block/drbd/drbd_worker.c | 2 +- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 7beb374451b..9a351a2cab7 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1133,10 +1133,10 @@ extern int drbd_thread_start(struct drbd_thread *thi); extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait); extern char *drbd_task_to_thread_name(struct drbd_conf *mdev, struct task_struct *task); #ifdef CONFIG_SMP -extern void drbd_thread_current_set_cpu(struct drbd_conf *mdev); +extern void drbd_thread_current_set_cpu(struct drbd_conf *mdev, struct drbd_thread *thi); extern void drbd_calc_cpu_mask(struct drbd_conf *mdev); #else -#define drbd_thread_current_set_cpu(A) ({}) +#define drbd_thread_current_set_cpu(A, B) ({}) #define drbd_calc_cpu_mask(A) ({}) #endif extern void drbd_free_resources(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 852a3e3fbb7..ae995ed0e6f 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -635,17 +635,15 @@ void drbd_calc_cpu_mask(struct drbd_conf *mdev) /** * drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread * @mdev: DRBD device. + * @thi: drbd_thread object * * call in the "main loop" of _all_ threads, no need for any mutex, current won't die * prematurely. */ -void drbd_thread_current_set_cpu(struct drbd_conf *mdev) +void drbd_thread_current_set_cpu(struct drbd_conf *mdev, struct drbd_thread *thi) { struct task_struct *p = current; - struct drbd_thread *thi = drbd_task_to_thread(mdev, p); - if (!expect(thi != NULL)) - return; if (!thi->reset_cpu_mask) return; thi->reset_cpu_mask = 0; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 3a9cd31e094..dfb59671ff1 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3773,7 +3773,7 @@ static void drbdd(struct drbd_conf *mdev) int rv; while (get_t_state(&mdev->tconn->receiver) == RUNNING) { - drbd_thread_current_set_cpu(mdev); + drbd_thread_current_set_cpu(mdev, &mdev->tconn->receiver); if (!drbd_recv_header(mdev, &cmd, &packet_size)) goto err_out; @@ -4564,7 +4564,7 @@ int drbd_asender(struct drbd_thread *thi) current->rt_priority = 2; /* more important than all other tasks */ while (get_t_state(thi) == RUNNING) { - drbd_thread_current_set_cpu(mdev); + drbd_thread_current_set_cpu(mdev, thi); if (test_and_clear_bit(SEND_PING, &mdev->flags)) { if (!drbd_send_ping(mdev)) { dev_err(DEV, "drbd_send_ping has failed\n"); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index f13d56c2bf0..0dbd20ca630 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1621,7 +1621,7 @@ int drbd_worker(struct drbd_thread *thi) sprintf(current->comm, "drbd%d_worker", mdev_to_minor(mdev)); while (get_t_state(thi) == RUNNING) { - drbd_thread_current_set_cpu(mdev); + drbd_thread_current_set_cpu(mdev, thi); if (down_trylock(&mdev->tconn->data.work.s)) { mutex_lock(&mdev->tconn->data.mutex); From 1f04af33fe7db542d75a487b8381b5a3402b7896 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 11:33:59 +0100 Subject: [PATCH 064/609] drbd: Moved code Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_worker.c | 43 ++++++++++++++++---------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 0dbd20ca630..28925d3d1a2 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -707,28 +707,6 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca return 1; } - -void start_resync_timer_fn(unsigned long data) -{ - struct drbd_conf *mdev = (struct drbd_conf *) data; - - drbd_queue_work(&mdev->tconn->data.work, &mdev->start_resync_work); -} - -int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel) -{ - if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) { - dev_warn(DEV, "w_start_resync later...\n"); - mdev->start_resync_timer.expires = jiffies + HZ/10; - add_timer(&mdev->start_resync_timer); - return 1; - } - - drbd_start_resync(mdev, C_SYNC_SOURCE); - clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags); - return 1; -} - int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { kfree(w); @@ -1462,6 +1440,27 @@ void drbd_rs_controller_reset(struct drbd_conf *mdev) spin_unlock(&mdev->peer_seq_lock); } +void start_resync_timer_fn(unsigned long data) +{ + struct drbd_conf *mdev = (struct drbd_conf *) data; + + drbd_queue_work(&mdev->tconn->data.work, &mdev->start_resync_work); +} + +int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) { + dev_warn(DEV, "w_start_resync later...\n"); + mdev->start_resync_timer.expires = jiffies + HZ/10; + add_timer(&mdev->start_resync_timer); + return 1; + } + + drbd_start_resync(mdev, C_SYNC_SOURCE); + clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags); + return 1; +} + /** * drbd_start_resync() - Start the resync process * @mdev: DRBD device. From e64a32945902a178c9de9b38e0ea3290981605bc Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Sat, 5 Feb 2011 17:34:11 +0100 Subject: [PATCH 065/609] drbd: Do no sleep long in drbd_start_resync Work items that sleep too long can cause requests to take as long as the longest sleeping work item. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_worker.c | 58 ++++++++++++++++++++------------ 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 9a351a2cab7..eec36af5674 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -787,6 +787,7 @@ enum { NEW_CUR_UUID, /* Create new current UUID when thawing IO */ AL_SUSPENDED, /* Activity logging is currently suspended. */ AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */ + B_RS_H_DONE, /* Before resync handler done (already executed) */ }; struct drbd_bitmap; /* opaque for drbd_conf */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 28925d3d1a2..a705979c71f 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1487,35 +1487,49 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) Ahead/Behind and SyncSource/SyncTarget */ } - if (side == C_SYNC_TARGET) { - /* Since application IO was locked out during C_WF_BITMAP_T and - C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET - we check that we might make the data inconsistent. */ - r = drbd_khelper(mdev, "before-resync-target"); - r = (r >> 8) & 0xff; - if (r > 0) { - dev_info(DEV, "before-resync-target handler returned %d, " - "dropping connection.\n", r); - drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); - return; - } - } else /* C_SYNC_SOURCE */ { - r = drbd_khelper(mdev, "before-resync-source"); - r = (r >> 8) & 0xff; - if (r > 0) { - if (r == 3) { - dev_info(DEV, "before-resync-source handler returned %d, " - "ignoring. Old userland tools?", r); - } else { - dev_info(DEV, "before-resync-source handler returned %d, " + if (!test_bit(B_RS_H_DONE, &mdev->flags)) { + if (side == C_SYNC_TARGET) { + /* Since application IO was locked out during C_WF_BITMAP_T and + C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET + we check that we might make the data inconsistent. */ + r = drbd_khelper(mdev, "before-resync-target"); + r = (r >> 8) & 0xff; + if (r > 0) { + dev_info(DEV, "before-resync-target handler returned %d, " "dropping connection.\n", r); drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); return; } + } else /* C_SYNC_SOURCE */ { + r = drbd_khelper(mdev, "before-resync-source"); + r = (r >> 8) & 0xff; + if (r > 0) { + if (r == 3) { + dev_info(DEV, "before-resync-source handler returned %d, " + "ignoring. Old userland tools?", r); + } else { + dev_info(DEV, "before-resync-source handler returned %d, " + "dropping connection.\n", r); + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + return; + } + } } } - drbd_state_lock(mdev); + if (current == mdev->tconn->worker.task) { + /* The worker should not sleep waiting for drbd_state_lock(), + that can take long */ + if (test_and_set_bit(CLUSTER_ST_CHANGE, &mdev->flags)) { + set_bit(B_RS_H_DONE, &mdev->flags); + mdev->start_resync_timer.expires = jiffies + HZ/5; + add_timer(&mdev->start_resync_timer); + return; + } + } else { + drbd_state_lock(mdev); + } + clear_bit(B_RS_H_DONE, &mdev->flags); if (!get_ldev_if_state(mdev, D_NEGOTIATING)) { drbd_state_unlock(mdev); From 8ea62f546487bc3f4e9a343ec82e5e03d9a3fe06 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Sat, 5 Feb 2011 17:56:05 +0100 Subject: [PATCH 066/609] drbd: Revert "Make sure we dont send state if a cluster wide state change is in progress" This reverts commit 6e9fdc92b77915d5c7ab8fea751f48378f8b0080. 1) This did not fixed the issue 2) Long sleeping work items can cause IO requests to take as long as the longest work item Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index ae995ed0e6f..f33ca43659e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -951,10 +951,6 @@ int drbd_send_state(struct drbd_conf *mdev) struct p_state p; int ok = 0; - /* Grab state lock so we wont send state if we're in the middle - * of a cluster wide state change on another thread */ - drbd_state_lock(mdev); - mutex_lock(&mdev->tconn->data.mutex); p.state = cpu_to_be32(mdev->state.i); /* Within the send mutex */ @@ -966,7 +962,6 @@ int drbd_send_state(struct drbd_conf *mdev) mutex_unlock(&mdev->tconn->data.mutex); - drbd_state_unlock(mdev); return ok; } From b53339fce2a692bf5f7cb583be2685444d52ded9 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 27 Jan 2011 14:37:23 +0100 Subject: [PATCH 067/609] drbd: Moving state related macros to drbd_state.h Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 52 ----------------------------------- 1 file changed, 52 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index eec36af5674..0afc83abc6f 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1603,58 +1603,6 @@ void drbd_bcast_ee(struct drbd_conf *, const char *, const int, const char *, const char *, const struct drbd_peer_request *); -/** - * DOC: DRBD State macros - * - * These macros are used to express state changes in easily readable form. - * - * The NS macros expand to a mask and a value, that can be bit ored onto the - * current state as soon as the spinlock (req_lock) was taken. - * - * The _NS macros are used for state functions that get called with the - * spinlock. These macros expand directly to the new state value. - * - * Besides the basic forms NS() and _NS() additional _?NS[23] are defined - * to express state changes that affect more than one aspect of the state. - * - * E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY) - * Means that the network connection was established and that the peer - * is in secondary role. - */ -#define role_MASK R_MASK -#define peer_MASK R_MASK -#define disk_MASK D_MASK -#define pdsk_MASK D_MASK -#define conn_MASK C_MASK -#define susp_MASK 1 -#define user_isp_MASK 1 -#define aftr_isp_MASK 1 -#define susp_nod_MASK 1 -#define susp_fen_MASK 1 - -#define NS(T, S) \ - ({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \ - ({ union drbd_state val; val.i = 0; val.T = (S); val; }) -#define NS2(T1, S1, T2, S2) \ - ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \ - mask.T2 = T2##_MASK; mask; }), \ - ({ union drbd_state val; val.i = 0; val.T1 = (S1); \ - val.T2 = (S2); val; }) -#define NS3(T1, S1, T2, S2, T3, S3) \ - ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \ - mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \ - ({ union drbd_state val; val.i = 0; val.T1 = (S1); \ - val.T2 = (S2); val.T3 = (S3); val; }) - -#define _NS(D, T, S) \ - D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T = (S); __ns; }) -#define _NS2(D, T1, S1, T2, S2) \ - D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ - __ns.T2 = (S2); __ns; }) -#define _NS3(D, T1, S1, T2, S2, T3, S3) \ - D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ - __ns.T2 = (S2); __ns.T3 = (S3); __ns; }) - /* * inline helper functions *************************/ From 60ae496626ca62e82b23977ace2e96c4e152edd1 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 14:01:51 +0100 Subject: [PATCH 068/609] drbd: conn_printk() a dev_printk() alike for drbd's connections Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 10 ++++++++++ drivers/block/drbd/drbd_main.c | 4 +++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 0afc83abc6f..c8a9b5003ae 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -102,6 +102,16 @@ struct drbd_tconn; /* to shorten dev_warn(DEV, "msg"); and relatives statements */ #define DEV (disk_to_dev(mdev->vdisk)) +#define conn_printk(LEVEL, TCONN, FMT, ARGS...) \ + printk(LEVEL "d-con %s: " FMT, TCONN->name , ## ARGS) +#define conn_alert(TCONN, FMT, ARGS...) conn_printk(KERN_ALERT, TCONN, FMT, ## ARGS) +#define conn_crit(TCONN, FMT, ARGS...) conn_printk(KERN_CRIT, TCONN, FMT, ## ARGS) +#define conn_err(TCONN, FMT, ARGS...) conn_printk(KERN_ERR, TCONN, FMT, ## ARGS) +#define conn_warn(TCONN, FMT, ARGS...) conn_printk(KERN_WARNING, TCONN, FMT, ## ARGS) +#define conn_notice(TCONN, FMT, ARGS...) conn_printk(KERN_NOTICE, TCONN, FMT, ## ARGS) +#define conn_info(TCONN, FMT, ARGS...) conn_printk(KERN_INFO, TCONN, FMT, ## ARGS) +#define conn_dbg(TCONN, FMT, ARGS...) conn_printk(KERN_DEBUG, TCONN, FMT, ## ARGS) + #define D_ASSERT(exp) if (!(exp)) \ dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f33ca43659e..541e35dbd6c 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2217,12 +2217,14 @@ struct drbd_conf *drbd_new_device(unsigned int minor) struct drbd_conf *mdev; struct gendisk *disk; struct request_queue *q; + char conn_name[9]; /* drbd1234N */ /* GFP_KERNEL, we are outside of all write-out paths */ mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL); if (!mdev) return NULL; - mdev->tconn = drbd_new_tconn("dummy"); + sprintf(conn_name, "drbd%d", minor); + mdev->tconn = drbd_new_tconn(conn_name); if (!mdev->tconn) goto out_no_tconn; From eac3e990e40616da1dc52212bc0631f2d029b026 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 14:05:07 +0100 Subject: [PATCH 069/609] drbd: Converted drbd_try_connect() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 32 +++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index dfb59671ff1..2c5ca8c3029 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -572,7 +572,7 @@ static void drbd_setbufsize(struct socket *sock, unsigned int snd, } } -static struct socket *drbd_try_connect(struct drbd_conf *mdev) +static struct socket *drbd_try_connect(struct drbd_tconn *tconn) { const char *what; struct socket *sock; @@ -580,11 +580,11 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) int err; int disconnect_on_error = 1; - if (!get_net_conf(mdev->tconn)) + if (!get_net_conf(tconn)) return NULL; what = "sock_create_kern"; - err = sock_create_kern(((struct sockaddr *)mdev->tconn->net_conf->my_addr)->sa_family, + err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family, SOCK_STREAM, IPPROTO_TCP, &sock); if (err < 0) { sock = NULL; @@ -592,9 +592,9 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) } sock->sk->sk_rcvtimeo = - sock->sk->sk_sndtimeo = mdev->tconn->net_conf->try_connect_int*HZ; - drbd_setbufsize(sock, mdev->tconn->net_conf->sndbuf_size, - mdev->tconn->net_conf->rcvbuf_size); + sock->sk->sk_sndtimeo = tconn->net_conf->try_connect_int*HZ; + drbd_setbufsize(sock, tconn->net_conf->sndbuf_size, + tconn->net_conf->rcvbuf_size); /* explicitly bind to the configured IP as source IP * for the outgoing connections. @@ -603,9 +603,9 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) * Make sure to use 0 as port number, so linux selects * a free one dynamically. */ - memcpy(&src_in6, mdev->tconn->net_conf->my_addr, - min_t(int, mdev->tconn->net_conf->my_addr_len, sizeof(src_in6))); - if (((struct sockaddr *)mdev->tconn->net_conf->my_addr)->sa_family == AF_INET6) + memcpy(&src_in6, tconn->net_conf->my_addr, + min_t(int, tconn->net_conf->my_addr_len, sizeof(src_in6))); + if (((struct sockaddr *)tconn->net_conf->my_addr)->sa_family == AF_INET6) src_in6.sin6_port = 0; else ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */ @@ -613,7 +613,7 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) what = "bind before connect"; err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, - mdev->tconn->net_conf->my_addr_len); + tconn->net_conf->my_addr_len); if (err < 0) goto out; @@ -622,8 +622,8 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) disconnect_on_error = 0; what = "connect"; err = sock->ops->connect(sock, - (struct sockaddr *)mdev->tconn->net_conf->peer_addr, - mdev->tconn->net_conf->peer_addr_len, 0); + (struct sockaddr *)tconn->net_conf->peer_addr, + tconn->net_conf->peer_addr_len, 0); out: if (err < 0) { @@ -641,12 +641,12 @@ out: disconnect_on_error = 0; break; default: - dev_err(DEV, "%s failed, err = %d\n", what, err); + conn_err(tconn, "%s failed, err = %d\n", what, err); } if (disconnect_on_error) - drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING)); } - put_net_conf(mdev->tconn); + put_net_conf(tconn); return sock; } @@ -774,7 +774,7 @@ static int drbd_connect(struct drbd_conf *mdev) do { for (try = 0;;) { /* 3 tries, this should take less than a second! */ - s = drbd_try_connect(mdev); + s = drbd_try_connect(mdev->tconn); if (s || ++try >= 3) break; /* give the other side time to call bind() & listen() */ From 7653620de341f45dc259d74d79c8d85df7e11326 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 14:09:54 +0100 Subject: [PATCH 070/609] drbd: Converted drbd_wait_for_connect() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 2c5ca8c3029..2e58d00742d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -447,8 +447,7 @@ void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head) /* see also kernel_accept; which is only present since 2.6.18. * also we want to log which part of it failed, exactly */ -static int drbd_accept(struct drbd_conf *mdev, const char **what, - struct socket *sock, struct socket **newsock) +static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock) { struct sock *sk = sock->sk; int err = 0; @@ -650,51 +649,51 @@ out: return sock; } -static struct socket *drbd_wait_for_connect(struct drbd_conf *mdev) +static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn) { int timeo, err; struct socket *s_estab = NULL, *s_listen; const char *what; - if (!get_net_conf(mdev->tconn)) + if (!get_net_conf(tconn)) return NULL; what = "sock_create_kern"; - err = sock_create_kern(((struct sockaddr *)mdev->tconn->net_conf->my_addr)->sa_family, + err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family, SOCK_STREAM, IPPROTO_TCP, &s_listen); if (err) { s_listen = NULL; goto out; } - timeo = mdev->tconn->net_conf->try_connect_int * HZ; + timeo = tconn->net_conf->try_connect_int * HZ; timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */ s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */ s_listen->sk->sk_rcvtimeo = timeo; s_listen->sk->sk_sndtimeo = timeo; - drbd_setbufsize(s_listen, mdev->tconn->net_conf->sndbuf_size, - mdev->tconn->net_conf->rcvbuf_size); + drbd_setbufsize(s_listen, tconn->net_conf->sndbuf_size, + tconn->net_conf->rcvbuf_size); what = "bind before listen"; err = s_listen->ops->bind(s_listen, - (struct sockaddr *) mdev->tconn->net_conf->my_addr, - mdev->tconn->net_conf->my_addr_len); + (struct sockaddr *) tconn->net_conf->my_addr, + tconn->net_conf->my_addr_len); if (err < 0) goto out; - err = drbd_accept(mdev, &what, s_listen, &s_estab); + err = drbd_accept(&what, s_listen, &s_estab); out: if (s_listen) sock_release(s_listen); if (err < 0) { if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) { - dev_err(DEV, "%s failed, err = %d\n", what, err); - drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + conn_err(tconn, "%s failed, err = %d\n", what, err); + drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING)); } } - put_net_conf(mdev->tconn); + put_net_conf(tconn); return s_estab; } @@ -805,7 +804,7 @@ static int drbd_connect(struct drbd_conf *mdev) } retry: - s = drbd_wait_for_connect(mdev); + s = drbd_wait_for_connect(mdev->tconn); if (s) { try = drbd_recv_fp(mdev, s); drbd_socket_okay(mdev, &sock); From 01a311a589709d83a1f2b7d2587e171d97f12017 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 14:30:33 +0100 Subject: [PATCH 071/609] drbd: Started to separated connection flags (tconn) from block device flags (mdev) Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 10 +++++++--- drivers/block/drbd/drbd_main.c | 8 ++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c8a9b5003ae..f46571acd4d 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -754,7 +754,7 @@ enum { #define EE_WAS_ERROR (1<<__EE_WAS_ERROR) #define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST) -/* global flag bits */ +/* flag bits per mdev */ enum { CREATE_BARRIER, /* next P_DATA is preceded by a P_BARRIER */ SIGNAL_ASENDER, /* whether asender wants to be interrupted */ @@ -782,8 +782,6 @@ enum { GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */ WAS_IO_ERROR, /* Local disk failed returned IO error */ RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ - NET_CONGESTED, /* The data socket is congested */ - CONFIG_PENDING, /* serialization of (re)configuration requests. * if set, also prevents the device from dying */ DEVICE_DYING, /* device became unconfigured, @@ -910,10 +908,16 @@ struct fifo_buffer { unsigned int size; }; +/* flag bits per tconn */ +enum { + NET_CONGESTED, /* The data socket is congested */ +}; + struct drbd_tconn { /* is a resource from the config file */ char *name; /* Resource name */ struct list_head all_tconn; /* List of all drbd_tconn, prot by global_state_lock */ struct drbd_conf *volume0; /* TODO: Remove me again */ + unsigned long flags; struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ atomic_t net_cnt; /* Users of net_conf */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 541e35dbd6c..8b443c8b13b 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1358,7 +1358,7 @@ static void drbd_update_congested(struct drbd_conf *mdev) { struct sock *sk = mdev->tconn->data.socket->sk; if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5) - set_bit(NET_CONGESTED, &mdev->flags); + set_bit(NET_CONGESTED, &mdev->tconn->flags); } /* The idea of sendpage seems to be to put some kind of reference @@ -1431,7 +1431,7 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, offset += sent; } while (len > 0 /* THINK && mdev->cstate >= C_CONNECTED*/); set_fs(oldfs); - clear_bit(NET_CONGESTED, &mdev->flags); + clear_bit(NET_CONGESTED, &mdev->tconn->flags); ok = (len == 0); if (likely(ok)) @@ -1694,7 +1694,7 @@ int drbd_send(struct drbd_conf *mdev, struct socket *sock, } while (sent < size); if (sock == mdev->tconn->data.socket) - clear_bit(NET_CONGESTED, &mdev->flags); + clear_bit(NET_CONGESTED, &mdev->tconn->flags); if (rv <= 0) { if (rv != -EAGAIN) { @@ -2161,7 +2161,7 @@ static int drbd_congested(void *congested_data, int bdi_bits) reason = 'b'; } - if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->flags)) { + if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->tconn->flags)) { r |= (1 << BDI_async_congested); reason = reason == 'b' ? 'a' : 'n'; } From 25703f832000393721641772a827469d46b1105b Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 14:35:25 +0100 Subject: [PATCH 072/609] drbd: Moved DISCARD_CONCURRENT to the per connection (tconn) flags Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_nl.c | 2 +- drivers/block/drbd/drbd_receiver.c | 14 +++++++------- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index f46571acd4d..2b8566362b7 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -763,7 +763,6 @@ enum { UNPLUG_QUEUED, /* only relevant with kernel 2.4 */ UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */ MD_DIRTY, /* current uuids and flags not yet on disk */ - DISCARD_CONCURRENT, /* Set on one node, cleared on the peer! */ USE_DEGR_WFC_T, /* degr-wfc-timeout instead of wfc-timeout. */ CLUSTER_ST_CHANGE, /* Cluster wide state change going on... */ CL_ST_CHG_SUCCESS, @@ -911,6 +910,7 @@ struct fifo_buffer { /* flag bits per tconn */ enum { NET_CONGESTED, /* The data socket is congested */ + DISCARD_CONCURRENT, /* Set on one node, cleared on the peer! */ }; struct drbd_tconn { /* is a resource from the config file */ diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index e30d52ba3fc..fda399ace8d 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1637,7 +1637,7 @@ void resync_after_online_grow(struct drbd_conf *mdev) if (mdev->state.role != mdev->state.peer) iass = (mdev->state.role == R_PRIMARY); else - iass = test_bit(DISCARD_CONCURRENT, &mdev->flags); + iass = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags); if (iass) drbd_start_resync(mdev, C_SYNC_SOURCE); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 2e58d00742d..e2eed149bb9 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -762,7 +762,7 @@ static int drbd_connect(struct drbd_conf *mdev) if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS) return -2; - clear_bit(DISCARD_CONCURRENT, &mdev->flags); + clear_bit(DISCARD_CONCURRENT, &mdev->tconn->flags); mdev->tconn->agreed_pro_version = 99; /* agreed_pro_version must be smaller than 100 so we send the old header (h80) in the first packet and in the handshake packet. */ @@ -823,7 +823,7 @@ retry: sock_release(msock); } msock = s; - set_bit(DISCARD_CONCURRENT, &mdev->flags); + set_bit(DISCARD_CONCURRENT, &mdev->tconn->flags); break; default: dev_warn(DEV, "Error receiving initial packet\n"); @@ -1779,7 +1779,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, /* don't get the req_lock yet, * we may sleep in drbd_wait_peer_seq */ const int size = peer_req->i.size; - const int discard = test_bit(DISCARD_CONCURRENT, &mdev->flags); + const int discard = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags); DEFINE_WAIT(wait); int first; @@ -2239,7 +2239,7 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) "Using discard-least-changes instead\n"); case ASB_DISCARD_ZERO_CHG: if (ch_peer == 0 && ch_self == 0) { - rv = test_bit(DISCARD_CONCURRENT, &mdev->flags) + rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) ? -1 : 1; break; } else { @@ -2255,7 +2255,7 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) rv = 1; else /* ( ch_self == ch_peer ) */ /* Well, then use something else. */ - rv = test_bit(DISCARD_CONCURRENT, &mdev->flags) + rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) ? -1 : 1; break; case ASB_DISCARD_LOCAL: @@ -2468,7 +2468,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l case 1: /* self_pri && !peer_pri */ return 1; case 2: /* !self_pri && peer_pri */ return -1; case 3: /* self_pri && peer_pri */ - dc = test_bit(DISCARD_CONCURRENT, &mdev->flags); + dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags); return dc ? -1 : 1; } } @@ -3209,7 +3209,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd, mask.i = be32_to_cpu(p->mask); val.i = be32_to_cpu(p->val); - if (test_bit(DISCARD_CONCURRENT, &mdev->flags) && + if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) && test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) { drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG); return true; From e43ef195f8bc828cac931a58d8c308228c51b7cf Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 14:40:40 +0100 Subject: [PATCH 073/609] drbd: Moved SEND_PING to the per connection (tconn) flags Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 ++-- drivers/block/drbd/drbd_receiver.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 2b8566362b7..227c0956e80 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -758,7 +758,6 @@ enum { enum { CREATE_BARRIER, /* next P_DATA is preceded by a P_BARRIER */ SIGNAL_ASENDER, /* whether asender wants to be interrupted */ - SEND_PING, /* whether asender should send a ping asap */ UNPLUG_QUEUED, /* only relevant with kernel 2.4 */ UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */ @@ -911,6 +910,7 @@ struct fifo_buffer { enum { NET_CONGESTED, /* The data socket is congested */ DISCARD_CONCURRENT, /* Set on one node, cleared on the peer! */ + SEND_PING, /* whether asender should send a ping asap */ }; struct drbd_tconn { /* is a resource from the config file */ @@ -1867,7 +1867,7 @@ static inline void wake_asender(struct drbd_conf *mdev) static inline void request_ping(struct drbd_conf *mdev) { - set_bit(SEND_PING, &mdev->flags); + set_bit(SEND_PING, &mdev->tconn->flags); wake_asender(mdev); } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e2eed149bb9..54bf7a5c225 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4564,7 +4564,7 @@ int drbd_asender(struct drbd_thread *thi) while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(mdev, thi); - if (test_and_clear_bit(SEND_PING, &mdev->flags)) { + if (test_and_clear_bit(SEND_PING, &mdev->tconn->flags)) { if (!drbd_send_ping(mdev)) { dev_err(DEV, "drbd_send_ping has failed\n"); goto reconnect; @@ -4635,7 +4635,7 @@ int drbd_asender(struct drbd_thread *thi) dev_err(DEV, "PingAck did not arrive in time.\n"); goto reconnect; } - set_bit(SEND_PING, &mdev->flags); + set_bit(SEND_PING, &mdev->tconn->flags); continue; } else if (rv == -EINTR) { continue; From 808e37b803958e09494e0c7de492386845060057 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 14:44:14 +0100 Subject: [PATCH 074/609] drbd: Moved SIGNAL_ASENDER to the per connection (tconn) flags Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 5 ++--- drivers/block/drbd/drbd_receiver.c | 8 ++++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 227c0956e80..33882c82b1a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -757,8 +757,6 @@ enum { /* flag bits per mdev */ enum { CREATE_BARRIER, /* next P_DATA is preceded by a P_BARRIER */ - SIGNAL_ASENDER, /* whether asender wants to be interrupted */ - UNPLUG_QUEUED, /* only relevant with kernel 2.4 */ UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */ MD_DIRTY, /* current uuids and flags not yet on disk */ @@ -911,6 +909,7 @@ enum { NET_CONGESTED, /* The data socket is congested */ DISCARD_CONCURRENT, /* Set on one node, cleared on the peer! */ SEND_PING, /* whether asender should send a ping asap */ + SIGNAL_ASENDER, /* whether asender wants to be interrupted */ }; struct drbd_tconn { /* is a resource from the config file */ @@ -1861,7 +1860,7 @@ drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) static inline void wake_asender(struct drbd_conf *mdev) { - if (test_bit(SIGNAL_ASENDER, &mdev->flags)) + if (test_bit(SIGNAL_ASENDER, &mdev->tconn->flags)) force_sig(DRBD_SIG, mdev->tconn->asender.task); } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 54bf7a5c225..b4c357e4ad8 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4580,12 +4580,12 @@ int drbd_asender(struct drbd_thread *thi) 3 < atomic_read(&mdev->unacked_cnt)) drbd_tcp_cork(mdev->tconn->meta.socket); while (1) { - clear_bit(SIGNAL_ASENDER, &mdev->flags); + clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags); flush_signals(current); if (!drbd_process_done_ee(mdev)) goto reconnect; /* to avoid race with newly queued ACKs */ - set_bit(SIGNAL_ASENDER, &mdev->flags); + set_bit(SIGNAL_ASENDER, &mdev->tconn->flags); spin_lock_irq(&mdev->tconn->req_lock); empty = list_empty(&mdev->done_ee); spin_unlock_irq(&mdev->tconn->req_lock); @@ -4605,7 +4605,7 @@ int drbd_asender(struct drbd_thread *thi) rv = drbd_recv_short(mdev, mdev->tconn->meta.socket, buf, expect-received, 0); - clear_bit(SIGNAL_ASENDER, &mdev->flags); + clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags); flush_signals(current); @@ -4688,7 +4688,7 @@ disconnect: drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); drbd_md_sync(mdev); } - clear_bit(SIGNAL_ASENDER, &mdev->flags); + clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags); D_ASSERT(mdev->state.conn < C_CONNECTED); dev_info(DEV, "asender terminated\n"); From 0625ac190d222fd0855bad79e93f1556fc45dd20 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 14:49:19 +0100 Subject: [PATCH 075/609] drbd: Converted wake_asender() and request_ping() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 12 ++++++------ drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_nl.c | 2 +- drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/drbd/drbd_worker.c | 4 ++-- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 33882c82b1a..0b2962c623a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1858,16 +1858,16 @@ drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) spin_unlock_irqrestore(&q->q_lock, flags); } -static inline void wake_asender(struct drbd_conf *mdev) +static inline void wake_asender(struct drbd_tconn *tconn) { - if (test_bit(SIGNAL_ASENDER, &mdev->tconn->flags)) - force_sig(DRBD_SIG, mdev->tconn->asender.task); + if (test_bit(SIGNAL_ASENDER, &tconn->flags)) + force_sig(DRBD_SIG, tconn->asender.task); } -static inline void request_ping(struct drbd_conf *mdev) +static inline void request_ping(struct drbd_tconn *tconn) { - set_bit(SEND_PING, &mdev->tconn->flags); - wake_asender(mdev); + set_bit(SEND_PING, &tconn->flags); + wake_asender(tconn); } static inline int drbd_send_short_cmd(struct drbd_conf *mdev, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 8b443c8b13b..899bbb1c986 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1348,7 +1348,7 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * if (!drop_it) { dev_err(DEV, "[%s/%d] sock_sendmsg time expired, ko = %u\n", current->comm, current->pid, mdev->tconn->ko_count); - request_ping(mdev); + request_ping(mdev->tconn); } return drop_it; /* && (mdev->state == R_PRIMARY) */; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index fda399ace8d..df36a573cd4 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -318,7 +318,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) enum drbd_disk_state nps; if (new_role == R_PRIMARY) - request_ping(mdev); /* Detect a dead peer ASAP */ + request_ping(mdev->tconn); /* Detect a dead peer ASAP */ mutex_lock(&mdev->state_mutex); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index b4c357e4ad8..a5234f99de0 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1864,7 +1864,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, * but I don't like the receiver using the msock */ put_ldev(mdev); - wake_asender(mdev); + wake_asender(mdev->tconn); finish_wait(&mdev->misc_wait, &wait); return true; } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index a705979c71f..5be179ba0c7 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -145,7 +145,7 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel if (do_al_complete_io) drbd_al_complete_io(mdev, e_sector); - wake_asender(mdev); + wake_asender(mdev->tconn); put_ldev(mdev); } @@ -728,7 +728,7 @@ static int w_resync_finished(struct drbd_conf *mdev, struct drbd_work *w, int ca static void ping_peer(struct drbd_conf *mdev) { clear_bit(GOT_PING_ACK, &mdev->flags); - request_ping(mdev); + request_ping(mdev->tconn); wait_event(mdev->misc_wait, test_bit(GOT_PING_ACK, &mdev->flags) || mdev->state.conn < C_CONNECTED); } From 1a7ba646e966500d60578aa7406c158c8cca51d4 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 14:56:02 +0100 Subject: [PATCH 076/609] drbd: Converted helper functions for drbd_send() to tconn * drbd_update_congested() * we_should_drop_the_connection() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 899bbb1c986..be4cb1ac2e9 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1331,34 +1331,34 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) * returns false if we should retry, * true if we think connection is dead */ -static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock) +static int we_should_drop_the_connection(struct drbd_tconn *tconn, struct socket *sock) { int drop_it; /* long elapsed = (long)(jiffies - mdev->last_received); */ - drop_it = mdev->tconn->meta.socket == sock - || !mdev->tconn->asender.task - || get_t_state(&mdev->tconn->asender) != RUNNING - || mdev->state.conn < C_CONNECTED; + drop_it = tconn->meta.socket == sock + || !tconn->asender.task + || get_t_state(&tconn->asender) != RUNNING + || tconn->volume0->state.conn < C_CONNECTED; if (drop_it) return true; - drop_it = !--mdev->tconn->ko_count; + drop_it = !--tconn->ko_count; if (!drop_it) { - dev_err(DEV, "[%s/%d] sock_sendmsg time expired, ko = %u\n", - current->comm, current->pid, mdev->tconn->ko_count); - request_ping(mdev->tconn); + conn_err(tconn, "[%s/%d] sock_sendmsg time expired, ko = %u\n", + current->comm, current->pid, tconn->ko_count); + request_ping(tconn); } return drop_it; /* && (mdev->state == R_PRIMARY) */; } -static void drbd_update_congested(struct drbd_conf *mdev) +static void drbd_update_congested(struct drbd_tconn *tconn) { - struct sock *sk = mdev->tconn->data.socket->sk; + struct sock *sk = tconn->data.socket->sk; if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5) - set_bit(NET_CONGESTED, &mdev->tconn->flags); + set_bit(NET_CONGESTED, &tconn->flags); } /* The idea of sendpage seems to be to put some kind of reference @@ -1409,14 +1409,14 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, return _drbd_no_send_page(mdev, page, offset, size, msg_flags); msg_flags |= MSG_NOSIGNAL; - drbd_update_congested(mdev); + drbd_update_congested(mdev->tconn); set_fs(KERNEL_DS); do { sent = mdev->tconn->data.socket->ops->sendpage(mdev->tconn->data.socket, page, offset, len, msg_flags); if (sent == -EAGAIN) { - if (we_should_drop_the_connection(mdev, + if (we_should_drop_the_connection(mdev->tconn, mdev->tconn->data.socket)) break; else @@ -1662,7 +1662,7 @@ int drbd_send(struct drbd_conf *mdev, struct socket *sock, if (sock == mdev->tconn->data.socket) { mdev->tconn->ko_count = mdev->tconn->net_conf->ko_count; - drbd_update_congested(mdev); + drbd_update_congested(mdev->tconn); } do { /* STRANGE @@ -1676,7 +1676,7 @@ int drbd_send(struct drbd_conf *mdev, struct socket *sock, */ rv = kernel_sendmsg(sock, &msg, &iov, 1, size); if (rv == -EAGAIN) { - if (we_should_drop_the_connection(mdev, sock)) + if (we_should_drop_the_connection(mdev->tconn, sock)) break; else continue; From bedbd2a53a0bcb5715b4d1f59ec8af045092a167 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 15:08:48 +0100 Subject: [PATCH 077/609] drbd: Converted drbd_send() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 +-- drivers/block/drbd/drbd_main.c | 45 +++++++++++++++++----------------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 0b2962c623a..ccbb0320a2c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1159,8 +1159,8 @@ extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, extern void tl_clear(struct drbd_conf *mdev); extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *); extern void drbd_free_sock(struct drbd_conf *mdev); -extern int drbd_send(struct drbd_conf *mdev, struct socket *sock, - void *buf, size_t size, unsigned msg_flags); +extern int drbd_send(struct drbd_tconn *tconn, struct socket *sock, + void *buf, size_t size, unsigned msg_flags); extern int drbd_send_protocol(struct drbd_conf *mdev); extern int drbd_send_uuids(struct drbd_conf *mdev); extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index be4cb1ac2e9..d1bfbfcf8f2 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -690,7 +690,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, prepare_header(mdev, h, cmd, size - sizeof(struct p_header)); - sent = drbd_send(mdev, sock, h, size, msg_flags); + sent = drbd_send(mdev->tconn, sock, h, size, msg_flags); ok = (sent == size); if (!ok && !signal_pending(current)) @@ -740,9 +740,9 @@ int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packet cmd, char *data, return 0; ok = (sizeof(h) == - drbd_send(mdev, mdev->tconn->data.socket, &h, sizeof(h), 0)); + drbd_send(mdev->tconn, mdev->tconn->data.socket, &h, sizeof(h), 0)); ok = ok && (size == - drbd_send(mdev, mdev->tconn->data.socket, data, size, 0)); + drbd_send(mdev->tconn, mdev->tconn->data.socket, data, size, 0)); drbd_put_data_sock(mdev); @@ -1306,8 +1306,8 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector, int size, mutex_lock(&mdev->tconn->data.mutex); - ok = (sizeof(p) == drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), 0)); - ok = ok && (digest_size == drbd_send(mdev, mdev->tconn->data.socket, digest, digest_size, 0)); + ok = (sizeof(p) == drbd_send(mdev->tconn, mdev->tconn->data.socket, &p, sizeof(p), 0)); + ok = ok && (digest_size == drbd_send(mdev->tconn, mdev->tconn->data.socket, digest, digest_size, 0)); mutex_unlock(&mdev->tconn->data.mutex); @@ -1385,7 +1385,7 @@ static void drbd_update_congested(struct drbd_tconn *tconn) static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, int offset, size_t size, unsigned msg_flags) { - int sent = drbd_send(mdev, mdev->tconn->data.socket, kmap(page) + offset, size, msg_flags); + int sent = drbd_send(mdev->tconn, mdev->tconn->data.socket, kmap(page) + offset, size, msg_flags); kunmap(page); if (sent == size) mdev->send_cnt += size>>9; @@ -1526,11 +1526,11 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) p.dp_flags = cpu_to_be32(dp_flags); set_bit(UNPLUG_REMOTE, &mdev->flags); ok = (sizeof(p) == - drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0)); + drbd_send(mdev->tconn, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0)); if (ok && dgs) { dgb = mdev->tconn->int_dig_out; drbd_csum_bio(mdev, mdev->tconn->integrity_w_tfm, req->master_bio, dgb); - ok = dgs == drbd_send(mdev, mdev->tconn->data.socket, dgb, dgs, 0); + ok = dgs == drbd_send(mdev->tconn, mdev->tconn->data.socket, dgb, dgs, 0); } if (ok) { /* For protocol A, we have to memcpy the payload into @@ -1599,11 +1599,11 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, if (!drbd_get_data_sock(mdev)) return 0; - ok = sizeof(p) == drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0); + ok = sizeof(p) == drbd_send(mdev->tconn, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0); if (ok && dgs) { dgb = mdev->tconn->int_dig_out; drbd_csum_ee(mdev, mdev->tconn->integrity_w_tfm, peer_req, dgb); - ok = dgs == drbd_send(mdev, mdev->tconn->data.socket, dgb, dgs, 0); + ok = dgs == drbd_send(mdev->tconn, mdev->tconn->data.socket, dgb, dgs, 0); } if (ok) ok = _drbd_send_zc_ee(mdev, peer_req); @@ -1639,7 +1639,7 @@ int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req) /* * you must have down()ed the appropriate [m]sock_mutex elsewhere! */ -int drbd_send(struct drbd_conf *mdev, struct socket *sock, +int drbd_send(struct drbd_tconn *tconn, struct socket *sock, void *buf, size_t size, unsigned msg_flags) { struct kvec iov; @@ -1660,9 +1660,9 @@ int drbd_send(struct drbd_conf *mdev, struct socket *sock, msg.msg_controllen = 0; msg.msg_flags = msg_flags | MSG_NOSIGNAL; - if (sock == mdev->tconn->data.socket) { - mdev->tconn->ko_count = mdev->tconn->net_conf->ko_count; - drbd_update_congested(mdev->tconn); + if (sock == tconn->data.socket) { + tconn->ko_count = tconn->net_conf->ko_count; + drbd_update_congested(tconn); } do { /* STRANGE @@ -1676,12 +1676,11 @@ int drbd_send(struct drbd_conf *mdev, struct socket *sock, */ rv = kernel_sendmsg(sock, &msg, &iov, 1, size); if (rv == -EAGAIN) { - if (we_should_drop_the_connection(mdev->tconn, sock)) + if (we_should_drop_the_connection(tconn, sock)) break; else continue; } - D_ASSERT(rv != 0); if (rv == -EINTR) { flush_signals(current); rv = 0; @@ -1693,17 +1692,17 @@ int drbd_send(struct drbd_conf *mdev, struct socket *sock, iov.iov_len -= rv; } while (sent < size); - if (sock == mdev->tconn->data.socket) - clear_bit(NET_CONGESTED, &mdev->tconn->flags); + if (sock == tconn->data.socket) + clear_bit(NET_CONGESTED, &tconn->flags); if (rv <= 0) { if (rv != -EAGAIN) { - dev_err(DEV, "%s_sendmsg returned %d\n", - sock == mdev->tconn->meta.socket ? "msock" : "sock", - rv); - drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE)); + conn_err(tconn, "%s_sendmsg returned %d\n", + sock == tconn->meta.socket ? "msock" : "sock", + rv); + drbd_force_state(tconn->volume0, NS(conn, C_BROKEN_PIPE)); } else - drbd_force_state(mdev, NS(conn, C_TIMEOUT)); + drbd_force_state(tconn->volume0, NS(conn, C_TIMEOUT)); } return sent; From d38e787eccb7afd069e33b2f4a32e02e0ad8decb Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 15:32:04 +0100 Subject: [PATCH 078/609] drbd: Converted drbd_send_fp() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 13 ++++++++--- drivers/block/drbd/drbd_main.c | 35 +++++++++++++++--------------- drivers/block/drbd/drbd_receiver.c | 14 +++++------- 3 files changed, 33 insertions(+), 29 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index ccbb0320a2c..e640ffdad9c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1168,9 +1168,9 @@ extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev); extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); extern int _drbd_send_state(struct drbd_conf *mdev); extern int drbd_send_state(struct drbd_conf *mdev); -extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, - enum drbd_packet cmd, struct p_header *h, - size_t size, unsigned msg_flags); +extern int _conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct socket *sock, + enum drbd_packet cmd, struct p_header *h, size_t size, + unsigned msg_flags); #define USE_DATA_SOCKET 1 #define USE_META_SOCKET 0 extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, @@ -1870,6 +1870,13 @@ static inline void request_ping(struct drbd_tconn *tconn) wake_asender(tconn); } +static inline int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, + enum drbd_packet cmd, struct p_header *h, size_t size, + unsigned msg_flags) +{ + return _conn_send_cmd(mdev->tconn, mdev->vnr, sock, cmd, h, size, msg_flags); +} + static inline int drbd_send_short_cmd(struct drbd_conf *mdev, enum drbd_packet cmd) { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index d1bfbfcf8f2..2a67e272b16 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -651,51 +651,50 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev, struct drbd_thread *thi } #endif -static void prepare_header80(struct drbd_conf *mdev, struct p_header80 *h, - enum drbd_packet cmd, int size) +static void prepare_header80(struct p_header80 *h, enum drbd_packet cmd, int size) { h->magic = cpu_to_be32(DRBD_MAGIC); h->command = cpu_to_be16(cmd); h->length = cpu_to_be16(size); } -static void prepare_header95(struct drbd_conf *mdev, struct p_header95 *h, - enum drbd_packet cmd, int size) +static void prepare_header95(struct p_header95 *h, enum drbd_packet cmd, int size) { h->magic = cpu_to_be16(DRBD_MAGIC_BIG); h->command = cpu_to_be16(cmd); h->length = cpu_to_be32(size); } +static void _prepare_header(struct drbd_tconn *tconn, int vnr, struct p_header *h, + enum drbd_packet cmd, int size) +{ + if (tconn->agreed_pro_version >= 100 || size > DRBD_MAX_SIZE_H80_PACKET) + prepare_header95(&h->h95, cmd, size); + else + prepare_header80(&h->h80, cmd, size); +} + static void prepare_header(struct drbd_conf *mdev, struct p_header *h, enum drbd_packet cmd, int size) { - if (mdev->tconn->agreed_pro_version >= 100 || size > DRBD_MAX_SIZE_H80_PACKET) - prepare_header95(mdev, &h->h95, cmd, size); - else - prepare_header80(mdev, &h->h80, cmd, size); + _prepare_header(mdev->tconn, mdev->vnr, h, cmd, size); } /* the appropriate socket mutex must be held already */ -int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, +int _conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct socket *sock, enum drbd_packet cmd, struct p_header *h, size_t size, unsigned msg_flags) { int sent, ok; - if (!expect(h)) - return false; - if (!expect(size)) - return false; + _prepare_header(tconn, vnr, h, cmd, size - sizeof(struct p_header)); - prepare_header(mdev, h, cmd, size - sizeof(struct p_header)); - - sent = drbd_send(mdev->tconn, sock, h, size, msg_flags); + sent = drbd_send(tconn, sock, h, size, msg_flags); ok = (sent == size); if (!ok && !signal_pending(current)) - dev_warn(DEV, "short sent %s size=%d sent=%d\n", - cmdname(cmd), (int)size, sent); + conn_warn(tconn, "short sent %s size=%d sent=%d\n", + cmdname(cmd), (int)size, sent); return ok; } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index a5234f99de0..96622d7eadf 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -698,16 +698,14 @@ out: return s_estab; } -static int drbd_send_fp(struct drbd_conf *mdev, struct socket *sock, - enum drbd_packet cmd) +static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd_packet cmd) { - struct p_header *h = &mdev->tconn->data.sbuf.header; + struct p_header *h = &tconn->data.sbuf.header; - return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0); + return _conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0); } -static enum drbd_packet drbd_recv_fp(struct drbd_conf *mdev, - struct socket *sock) +static enum drbd_packet drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock) { struct p_header80 *h = &mdev->tconn->data.rbuf.header.h80; int rr; @@ -782,11 +780,11 @@ static int drbd_connect(struct drbd_conf *mdev) if (s) { if (!sock) { - drbd_send_fp(mdev, s, P_HAND_SHAKE_S); + drbd_send_fp(mdev->tconn, s, P_HAND_SHAKE_S); sock = s; s = NULL; } else if (!msock) { - drbd_send_fp(mdev, s, P_HAND_SHAKE_M); + drbd_send_fp(mdev->tconn, s, P_HAND_SHAKE_M); msock = s; s = NULL; } else { From dbd9eea094964e31c718ad8ade7c296d9e9da758 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 15:34:16 +0100 Subject: [PATCH 079/609] drbd: Removed unused mdev argument from drbd_recv_short() and drbd_socket_okay() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 96622d7eadf..8d048df04a3 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -476,8 +476,7 @@ out: return err; } -static int drbd_recv_short(struct drbd_conf *mdev, struct socket *sock, - void *buf, size_t size, int flags) +static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags) { mm_segment_t oldfs; struct kvec iov = { @@ -710,7 +709,7 @@ static enum drbd_packet drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock struct p_header80 *h = &mdev->tconn->data.rbuf.header.h80; int rr; - rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0); + rr = drbd_recv_short(sock, h, sizeof(*h), 0); if (rr == sizeof(*h) && h->magic == cpu_to_be32(DRBD_MAGIC)) return be16_to_cpu(h->command); @@ -720,10 +719,9 @@ static enum drbd_packet drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock /** * drbd_socket_okay() - Free the socket if its connection is not okay - * @mdev: DRBD device. * @sock: pointer to the pointer to the socket. */ -static int drbd_socket_okay(struct drbd_conf *mdev, struct socket **sock) +static int drbd_socket_okay(struct socket **sock) { int rr; char tb[4]; @@ -731,7 +729,7 @@ static int drbd_socket_okay(struct drbd_conf *mdev, struct socket **sock) if (!*sock) return false; - rr = drbd_recv_short(mdev, *sock, tb, 4, MSG_DONTWAIT | MSG_PEEK); + rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK); if (rr > 0 || rr == -EAGAIN) { return true; @@ -795,8 +793,8 @@ static int drbd_connect(struct drbd_conf *mdev) if (sock && msock) { schedule_timeout_interruptible(mdev->tconn->net_conf->ping_timeo*HZ/10); - ok = drbd_socket_okay(mdev, &sock); - ok = drbd_socket_okay(mdev, &msock) && ok; + ok = drbd_socket_okay(&sock); + ok = drbd_socket_okay(&msock) && ok; if (ok) break; } @@ -805,8 +803,8 @@ retry: s = drbd_wait_for_connect(mdev->tconn); if (s) { try = drbd_recv_fp(mdev, s); - drbd_socket_okay(mdev, &sock); - drbd_socket_okay(mdev, &msock); + drbd_socket_okay(&sock); + drbd_socket_okay(&msock); switch (try) { case P_HAND_SHAKE_S: if (sock) { @@ -841,8 +839,8 @@ retry: } if (sock && msock) { - ok = drbd_socket_okay(mdev, &sock); - ok = drbd_socket_okay(mdev, &msock) && ok; + ok = drbd_socket_okay(&sock); + ok = drbd_socket_okay(&msock) && ok; if (ok) break; } @@ -4601,8 +4599,7 @@ int drbd_asender(struct drbd_thread *thi) if (signal_pending(current)) continue; - rv = drbd_recv_short(mdev, mdev->tconn->meta.socket, - buf, expect-received, 0); + rv = drbd_recv_short(mdev->tconn->meta.socket, buf, expect-received, 0); clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags); flush_signals(current); From a25b63f1e75df7dbc27666b627e6277d7fea92b7 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 15:43:45 +0100 Subject: [PATCH 080/609] drbd: Converted drbd_recv_fp() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 8d048df04a3..60a4f651a08 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -704,9 +704,9 @@ static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd return _conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0); } -static enum drbd_packet drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock) +static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock) { - struct p_header80 *h = &mdev->tconn->data.rbuf.header.h80; + struct p_header80 *h = &tconn->data.rbuf.header.h80; int rr; rr = drbd_recv_short(sock, h, sizeof(*h), 0); @@ -802,7 +802,7 @@ static int drbd_connect(struct drbd_conf *mdev) retry: s = drbd_wait_for_connect(mdev->tconn); if (s) { - try = drbd_recv_fp(mdev, s); + try = drbd_recv_fp(mdev->tconn, s); drbd_socket_okay(&sock); drbd_socket_okay(&msock); switch (try) { From 8a22cccc2068b35124f340fcc3f38b730007deff Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 16:47:12 +0100 Subject: [PATCH 081/609] drbd: Converted drbd_send_handshake() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_receiver.c | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e640ffdad9c..845ff34d206 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -378,7 +378,7 @@ struct p_block_req { */ struct p_handshake { - struct p_header head; /* Note: You must always use a h80 here */ + struct p_header head; /* Note: vnr will be ignored */ u32 protocol_min; u32 feature_flags; u32 protocol_max; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 60a4f651a08..565f2ea47ab 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3952,28 +3952,28 @@ static void drbd_disconnect(struct drbd_conf *mdev) * * for now, they are expected to be zero, but ignored. */ -static int drbd_send_handshake(struct drbd_conf *mdev) +static int drbd_send_handshake(struct drbd_tconn *tconn) { /* ASSERT current == mdev->tconn->receiver ... */ - struct p_handshake *p = &mdev->tconn->data.sbuf.handshake; + struct p_handshake *p = &tconn->data.sbuf.handshake; int ok; - if (mutex_lock_interruptible(&mdev->tconn->data.mutex)) { - dev_err(DEV, "interrupted during initial handshake\n"); + if (mutex_lock_interruptible(&tconn->data.mutex)) { + conn_err(tconn, "interrupted during initial handshake\n"); return 0; /* interrupted. not ok. */ } - if (mdev->tconn->data.socket == NULL) { - mutex_unlock(&mdev->tconn->data.mutex); + if (tconn->data.socket == NULL) { + mutex_unlock(&tconn->data.mutex); return 0; } memset(p, 0, sizeof(*p)); p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); - ok = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_HAND_SHAKE, - &p->head, sizeof(*p), 0 ); - mutex_unlock(&mdev->tconn->data.mutex); + ok = _conn_send_cmd(tconn, 0, tconn->data.socket, P_HAND_SHAKE, + &p->head, sizeof(*p), 0); + mutex_unlock(&tconn->data.mutex); return ok; } @@ -3993,7 +3993,7 @@ static int drbd_do_handshake(struct drbd_conf *mdev) enum drbd_packet cmd; int rv; - rv = drbd_send_handshake(mdev); + rv = drbd_send_handshake(mdev->tconn); if (!rv) return 0; From de0ff338d61645f39e0687c9c3560d8b64bed4a3 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 16:56:20 +0100 Subject: [PATCH 082/609] drbd: Converted drbd_recv() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 46 +++++++++++++++--------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 565f2ea47ab..1368fc3518d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -498,7 +498,7 @@ static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flag return rv; } -static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) +static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size) { mm_segment_t oldfs; struct kvec iov = { @@ -516,7 +516,7 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) set_fs(KERNEL_DS); for (;;) { - rv = sock_recvmsg(mdev->tconn->data.socket, &msg, size, msg.msg_flags); + rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags); if (rv == size) break; @@ -527,12 +527,12 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) if (rv < 0) { if (rv == -ECONNRESET) - dev_info(DEV, "sock was reset by peer\n"); + conn_info(tconn, "sock was reset by peer\n"); else if (rv != -ERESTARTSYS) - dev_err(DEV, "sock_recvmsg returned %d\n", rv); + conn_err(tconn, "sock_recvmsg returned %d\n", rv); break; } else if (rv == 0) { - dev_info(DEV, "sock was shut down by peer\n"); + conn_info(tconn, "sock was shut down by peer\n"); break; } else { /* signal came in, or peer/link went down, @@ -546,7 +546,7 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) set_fs(oldfs); if (rv != size) - drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE)); + drbd_force_state(tconn->volume0, NS(conn, C_BROKEN_PIPE)); return rv; } @@ -949,7 +949,7 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packet *cmd, struct p_header *h = &mdev->tconn->data.rbuf.header; int r; - r = drbd_recv(mdev, h, sizeof(*h)); + r = drbd_recv(mdev->tconn, h, sizeof(*h)); if (unlikely(r != sizeof(*h))) { if (!signal_pending(current)) dev_warn(DEV, "short read expecting header on sock: r=%d\n", r); @@ -1272,7 +1272,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0; if (dgs) { - rr = drbd_recv(mdev, dig_in, dgs); + rr = drbd_recv(mdev->tconn, dig_in, dgs); if (rr != dgs) { if (!signal_pending(current)) dev_warn(DEV, @@ -1313,7 +1313,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, page_chain_for_each(page) { unsigned len = min_t(int, ds, PAGE_SIZE); data = kmap(page); - rr = drbd_recv(mdev, data, len); + rr = drbd_recv(mdev->tconn, data, len); if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) { dev_err(DEV, "Fault injection: Corrupting data on receive\n"); data[0] = data[0] ^ (unsigned long)-1; @@ -1360,7 +1360,7 @@ static int drbd_drain_block(struct drbd_conf *mdev, int data_size) data = kmap(page); while (data_size) { - rr = drbd_recv(mdev, data, min_t(int, data_size, PAGE_SIZE)); + rr = drbd_recv(mdev->tconn, data, min_t(int, data_size, PAGE_SIZE)); if (rr != min_t(int, data_size, PAGE_SIZE)) { rv = 0; if (!signal_pending(current)) @@ -1389,7 +1389,7 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0; if (dgs) { - rr = drbd_recv(mdev, dig_in, dgs); + rr = drbd_recv(mdev->tconn, dig_in, dgs); if (rr != dgs) { if (!signal_pending(current)) dev_warn(DEV, @@ -1410,7 +1410,7 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, bio_for_each_segment(bvec, bio, i) { expect = min_t(int, data_size, bvec->bv_len); - rr = drbd_recv(mdev, + rr = drbd_recv(mdev->tconn, kmap(bvec->bv_page)+bvec->bv_offset, expect); kunmap(bvec->bv_page); @@ -2094,7 +2094,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, peer_req->digest = di; peer_req->flags |= EE_HAS_DIGEST; - if (drbd_recv(mdev, di->digest, digest_size) != digest_size) + if (drbd_recv(mdev->tconn, di->digest, digest_size) != digest_size) goto out_free_e; if (cmd == P_CSUM_RS_REQUEST) { @@ -2785,7 +2785,7 @@ static int receive_protocol(struct drbd_conf *mdev, enum drbd_packet cmd, if (mdev->tconn->agreed_pro_version >= 87) { unsigned char *my_alg = mdev->tconn->net_conf->integrity_alg; - if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size) + if (drbd_recv(mdev->tconn, p_integrity_alg, data_size) != data_size) return false; p_integrity_alg[SHARED_SECRET_MAX-1] = 0; @@ -2871,7 +2871,7 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, /* initialize verify_alg and csums_alg */ memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); - if (drbd_recv(mdev, &p->head.payload, header_size) != header_size) + if (drbd_recv(mdev->tconn, &p->head.payload, header_size) != header_size) return false; mdev->sync_conf.rate = be32_to_cpu(p->rate); @@ -2885,7 +2885,7 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, return false; } - if (drbd_recv(mdev, p->verify_alg, data_size) != data_size) + if (drbd_recv(mdev->tconn, p->verify_alg, data_size) != data_size) return false; /* we expect NUL terminated string */ @@ -3424,7 +3424,7 @@ receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size, } if (want == 0) return 0; - err = drbd_recv(mdev, buffer, want); + err = drbd_recv(mdev->tconn, buffer, want); if (err != want) { if (err >= 0) err = -EIO; @@ -3613,7 +3613,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, /* use the page buff */ p = buffer; memcpy(p, h, sizeof(*h)); - if (drbd_recv(mdev, p->head.payload, data_size) != data_size) + if (drbd_recv(mdev->tconn, p->head.payload, data_size) != data_size) goto out; if (data_size <= (sizeof(*p) - sizeof(p->head))) { dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size); @@ -3677,7 +3677,7 @@ static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd, size = data_size; while (size > 0) { want = min_t(int, size, sizeof(sink)); - r = drbd_recv(mdev, sink, want); + r = drbd_recv(mdev->tconn, sink, want); if (!expect(r > 0)) break; size -= r; @@ -3784,7 +3784,7 @@ static void drbdd(struct drbd_conf *mdev) } if (shs) { - rv = drbd_recv(mdev, &header->payload, shs); + rv = drbd_recv(mdev->tconn, &header->payload, shs); if (unlikely(rv != shs)) { if (!signal_pending(current)) dev_warn(DEV, "short read while reading sub header: rv=%d\n", rv); @@ -4013,7 +4013,7 @@ static int drbd_do_handshake(struct drbd_conf *mdev) return -1; } - rv = drbd_recv(mdev, &p->head.payload, expect); + rv = drbd_recv(mdev->tconn, &p->head.payload, expect); if (rv != expect) { if (!signal_pending(current)) @@ -4116,7 +4116,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) goto fail; } - rv = drbd_recv(mdev, peers_ch, length); + rv = drbd_recv(mdev->tconn, peers_ch, length); if (rv != length) { if (!signal_pending(current)) @@ -4164,7 +4164,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) goto fail; } - rv = drbd_recv(mdev, response , resp_size); + rv = drbd_recv(mdev->tconn, response , resp_size); if (rv != resp_size) { if (!signal_pending(current)) From 77351055b5244a3131bd8564dccc8bd95a995317 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 17:24:26 +0100 Subject: [PATCH 083/609] drbd: struct packet_info to hold information of decoded packets Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 103 +++++++++++++++-------------- 1 file changed, 54 insertions(+), 49 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1368fc3518d..380d24e8434 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -48,6 +48,12 @@ #include "drbd_vli.h" +struct packet_info { + enum drbd_packet cmd; + int size; + int vnr; +}; + enum finish_epoch { FE_STILL_LIVE, FE_DESTROYED, @@ -924,15 +930,15 @@ out_release_sockets: return -1; } -static bool decode_header(struct drbd_conf *mdev, struct p_header *h, - enum drbd_packet *cmd, unsigned int *packet_size) +static bool decode_header(struct drbd_conf *mdev, struct p_header *h, struct packet_info *pi) { if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) { - *cmd = be16_to_cpu(h->h80.command); - *packet_size = be16_to_cpu(h->h80.length); + pi->cmd = be16_to_cpu(h->h80.command); + pi->size = be16_to_cpu(h->h80.length); } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) { - *cmd = be16_to_cpu(h->h95.command); - *packet_size = be32_to_cpu(h->h95.length) & 0x00ffffff; + pi->cmd = be16_to_cpu(h->h95.command); + pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff; + pi->vnr = 0; } else { dev_err(DEV, "magic?? on data m: 0x%08x c: %d l: %d\n", be32_to_cpu(h->h80.magic), @@ -943,8 +949,7 @@ static bool decode_header(struct drbd_conf *mdev, struct p_header *h, return true; } -static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packet *cmd, - unsigned int *packet_size) +static int drbd_recv_header(struct drbd_conf *mdev, struct packet_info *pi) { struct p_header *h = &mdev->tconn->data.rbuf.header; int r; @@ -956,7 +961,7 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packet *cmd, return false; } - r = decode_header(mdev, h, cmd, packet_size); + r = decode_header(mdev, h, pi); mdev->tconn->last_received = jiffies; return r; @@ -3580,6 +3585,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, int err; int ok = false; struct p_header *h = &mdev->tconn->data.rbuf.header; + struct packet_info pi; drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED); /* you are supposed to send additional out-of-sync information @@ -3633,8 +3639,10 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, goto out; break; } - if (!drbd_recv_header(mdev, &cmd, &data_size)) + if (!drbd_recv_header(mdev, &pi)) goto out; + cmd = pi.cmd; + data_size = pi.size; } INFO_bm_xfer_stats(mdev, "receive", &c); @@ -3762,24 +3770,23 @@ static struct data_cmd drbd_cmd_handler[] = { static void drbdd(struct drbd_conf *mdev) { struct p_header *header = &mdev->tconn->data.rbuf.header; - unsigned int packet_size; - enum drbd_packet cmd; + struct packet_info pi; size_t shs; /* sub header size */ int rv; while (get_t_state(&mdev->tconn->receiver) == RUNNING) { drbd_thread_current_set_cpu(mdev, &mdev->tconn->receiver); - if (!drbd_recv_header(mdev, &cmd, &packet_size)) + if (!drbd_recv_header(mdev, &pi)) goto err_out; - if (unlikely(cmd >= P_MAX_CMD || !drbd_cmd_handler[cmd].function)) { - dev_err(DEV, "unknown packet type %d, l: %d!\n", cmd, packet_size); + if (unlikely(pi.cmd >= P_MAX_CMD || !drbd_cmd_handler[pi.cmd].function)) { + dev_err(DEV, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size); goto err_out; } - shs = drbd_cmd_handler[cmd].pkt_size - sizeof(struct p_header); - if (packet_size - shs > 0 && !drbd_cmd_handler[cmd].expect_payload) { - dev_err(DEV, "No payload expected %s l:%d\n", cmdname(cmd), packet_size); + shs = drbd_cmd_handler[pi.cmd].pkt_size - sizeof(struct p_header); + if (pi.size - shs > 0 && !drbd_cmd_handler[pi.cmd].expect_payload) { + dev_err(DEV, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size); goto err_out; } @@ -3792,11 +3799,11 @@ static void drbdd(struct drbd_conf *mdev) } } - rv = drbd_cmd_handler[cmd].function(mdev, cmd, packet_size - shs); + rv = drbd_cmd_handler[pi.cmd].function(mdev, pi.cmd, pi.size - shs); if (unlikely(!rv)) { dev_err(DEV, "error receiving %s, l: %d!\n", - cmdname(cmd), packet_size); + cmdname(pi.cmd), pi.size); goto err_out; } } @@ -3989,27 +3996,26 @@ static int drbd_do_handshake(struct drbd_conf *mdev) /* ASSERT current == mdev->tconn->receiver ... */ struct p_handshake *p = &mdev->tconn->data.rbuf.handshake; const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80); - unsigned int length; - enum drbd_packet cmd; + struct packet_info pi; int rv; rv = drbd_send_handshake(mdev->tconn); if (!rv) return 0; - rv = drbd_recv_header(mdev, &cmd, &length); + rv = drbd_recv_header(mdev, &pi); if (!rv) return 0; - if (cmd != P_HAND_SHAKE) { + if (pi.cmd != P_HAND_SHAKE) { dev_err(DEV, "expected HandShake packet, received: %s (0x%04x)\n", - cmdname(cmd), cmd); + cmdname(pi.cmd), pi.cmd); return -1; } - if (length != expect) { + if (pi.size != expect) { dev_err(DEV, "expected HandShake length: %u, received: %u\n", - expect, length); + expect, pi.size); return -1; } @@ -4071,8 +4077,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) unsigned int key_len = strlen(mdev->tconn->net_conf->shared_secret); unsigned int resp_size; struct hash_desc desc; - enum drbd_packet cmd; - unsigned int length; + struct packet_info pi; int rv; desc.tfm = mdev->tconn->cram_hmac_tfm; @@ -4092,33 +4097,33 @@ static int drbd_do_auth(struct drbd_conf *mdev) if (!rv) goto fail; - rv = drbd_recv_header(mdev, &cmd, &length); + rv = drbd_recv_header(mdev, &pi); if (!rv) goto fail; - if (cmd != P_AUTH_CHALLENGE) { + if (pi.cmd != P_AUTH_CHALLENGE) { dev_err(DEV, "expected AuthChallenge packet, received: %s (0x%04x)\n", - cmdname(cmd), cmd); + cmdname(pi.cmd), pi.cmd); rv = 0; goto fail; } - if (length > CHALLENGE_LEN * 2) { + if (pi.size > CHALLENGE_LEN * 2) { dev_err(DEV, "expected AuthChallenge payload too big.\n"); rv = -1; goto fail; } - peers_ch = kmalloc(length, GFP_NOIO); + peers_ch = kmalloc(pi.size, GFP_NOIO); if (peers_ch == NULL) { dev_err(DEV, "kmalloc of peers_ch failed\n"); rv = -1; goto fail; } - rv = drbd_recv(mdev->tconn, peers_ch, length); + rv = drbd_recv(mdev->tconn, peers_ch, pi.size); - if (rv != length) { + if (rv != pi.size) { if (!signal_pending(current)) dev_warn(DEV, "short read AuthChallenge: l=%u\n", rv); rv = 0; @@ -4134,7 +4139,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) } sg_init_table(&sg, 1); - sg_set_buf(&sg, peers_ch, length); + sg_set_buf(&sg, peers_ch, pi.size); rv = crypto_hash_digest(&desc, &sg, sg.length, response); if (rv) { @@ -4147,18 +4152,18 @@ static int drbd_do_auth(struct drbd_conf *mdev) if (!rv) goto fail; - rv = drbd_recv_header(mdev, &cmd, &length); + rv = drbd_recv_header(mdev, &pi); if (!rv) goto fail; - if (cmd != P_AUTH_RESPONSE) { + if (pi.cmd != P_AUTH_RESPONSE) { dev_err(DEV, "expected AuthResponse packet, received: %s (0x%04x)\n", - cmdname(cmd), cmd); + cmdname(pi.cmd), pi.cmd); rv = 0; goto fail; } - if (length != resp_size) { + if (pi.size != resp_size) { dev_err(DEV, "expected AuthResponse payload of wrong size\n"); rv = 0; goto fail; @@ -4544,14 +4549,14 @@ int drbd_asender(struct drbd_thread *thi) struct drbd_conf *mdev = thi->mdev; struct p_header *h = &mdev->tconn->meta.rbuf.header; struct asender_cmd *cmd = NULL; + struct packet_info pi; int rv; void *buf = h; int received = 0; int expect = sizeof(struct p_header); int ping_timeout_active = 0; - int empty, pkt_size; - enum drbd_packet cmd_nr; + int empty; sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); @@ -4640,25 +4645,25 @@ int drbd_asender(struct drbd_thread *thi) } if (received == expect && cmd == NULL) { - if (!decode_header(mdev, h, &cmd_nr, &pkt_size)) + if (!decode_header(mdev, h, &pi)) goto reconnect; - cmd = get_asender_cmd(cmd_nr); + cmd = get_asender_cmd(pi.cmd); if (unlikely(cmd == NULL)) { dev_err(DEV, "unknown command %d on meta (l: %d)\n", - cmd_nr, pkt_size); + pi.cmd, pi.size); goto disconnect; } expect = cmd->pkt_size; - if (pkt_size != expect - sizeof(struct p_header)) { + if (pi.size != expect - sizeof(struct p_header)) { dev_err(DEV, "Wrong packet size on meta (c: %d, l: %d)\n", - cmd_nr, pkt_size); + pi.cmd, pi.size); goto reconnect; } } if (received == expect) { mdev->tconn->last_received = jiffies; D_ASSERT(cmd != NULL); - if (!cmd->process(mdev, cmd_nr)) + if (!cmd->process(mdev, pi.cmd)) goto reconnect; /* the idle_timeout (ping-int) From ce24385342d21bd22c95d2f7162f71df313d0dea Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 17:27:47 +0100 Subject: [PATCH 084/609] drbd: Converted decode_header() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 380d24e8434..7d210548a98 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -930,7 +930,7 @@ out_release_sockets: return -1; } -static bool decode_header(struct drbd_conf *mdev, struct p_header *h, struct packet_info *pi) +static bool decode_header(struct drbd_tconn *tconn, struct p_header *h, struct packet_info *pi) { if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) { pi->cmd = be16_to_cpu(h->h80.command); @@ -940,7 +940,7 @@ static bool decode_header(struct drbd_conf *mdev, struct p_header *h, struct pac pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff; pi->vnr = 0; } else { - dev_err(DEV, "magic?? on data m: 0x%08x c: %d l: %d\n", + conn_err(tconn, "magic?? on data m: 0x%08x c: %d l: %d\n", be32_to_cpu(h->h80.magic), be16_to_cpu(h->h80.command), be16_to_cpu(h->h80.length)); @@ -961,7 +961,7 @@ static int drbd_recv_header(struct drbd_conf *mdev, struct packet_info *pi) return false; } - r = decode_header(mdev, h, pi); + r = decode_header(mdev->tconn, h, pi); mdev->tconn->last_received = jiffies; return r; @@ -4645,7 +4645,7 @@ int drbd_asender(struct drbd_thread *thi) } if (received == expect && cmd == NULL) { - if (!decode_header(mdev, h, &pi)) + if (!decode_header(mdev->tconn, h, &pi)) goto reconnect; cmd = get_asender_cmd(pi.cmd); if (unlikely(cmd == NULL)) { From 9ba7aa00ae574714c4decf8f3e0dcdb679a3239e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 17:32:41 +0100 Subject: [PATCH 085/609] drbd: Converted drbd_recv_header() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7d210548a98..ebd8320d123 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -949,20 +949,20 @@ static bool decode_header(struct drbd_tconn *tconn, struct p_header *h, struct p return true; } -static int drbd_recv_header(struct drbd_conf *mdev, struct packet_info *pi) +static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi) { - struct p_header *h = &mdev->tconn->data.rbuf.header; + struct p_header *h = &tconn->data.rbuf.header; int r; - r = drbd_recv(mdev->tconn, h, sizeof(*h)); + r = drbd_recv(tconn, h, sizeof(*h)); if (unlikely(r != sizeof(*h))) { if (!signal_pending(current)) - dev_warn(DEV, "short read expecting header on sock: r=%d\n", r); + conn_warn(tconn, "short read expecting header on sock: r=%d\n", r); return false; } - r = decode_header(mdev->tconn, h, pi); - mdev->tconn->last_received = jiffies; + r = decode_header(tconn, h, pi); + tconn->last_received = jiffies; return r; } @@ -3639,7 +3639,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, goto out; break; } - if (!drbd_recv_header(mdev, &pi)) + if (!drbd_recv_header(mdev->tconn, &pi)) goto out; cmd = pi.cmd; data_size = pi.size; @@ -3776,7 +3776,7 @@ static void drbdd(struct drbd_conf *mdev) while (get_t_state(&mdev->tconn->receiver) == RUNNING) { drbd_thread_current_set_cpu(mdev, &mdev->tconn->receiver); - if (!drbd_recv_header(mdev, &pi)) + if (!drbd_recv_header(mdev->tconn, &pi)) goto err_out; if (unlikely(pi.cmd >= P_MAX_CMD || !drbd_cmd_handler[pi.cmd].function)) { @@ -4003,7 +4003,7 @@ static int drbd_do_handshake(struct drbd_conf *mdev) if (!rv) return 0; - rv = drbd_recv_header(mdev, &pi); + rv = drbd_recv_header(mdev->tconn, &pi); if (!rv) return 0; @@ -4097,7 +4097,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) if (!rv) goto fail; - rv = drbd_recv_header(mdev, &pi); + rv = drbd_recv_header(mdev->tconn, &pi); if (!rv) goto fail; @@ -4152,7 +4152,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) if (!rv) goto fail; - rv = drbd_recv_header(mdev, &pi); + rv = drbd_recv_header(mdev->tconn, &pi); if (!rv) goto fail; From 65d11ed6f2430498bf3735d40a9e243409780fb1 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 17:35:59 +0100 Subject: [PATCH 086/609] drbd: Converted drbd_do_handshake() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index ebd8320d123..0a4d15c913e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -60,7 +60,7 @@ enum finish_epoch { FE_RECYCLED, }; -static int drbd_do_handshake(struct drbd_conf *mdev); +static int drbd_do_handshake(struct drbd_tconn *tconn); static int drbd_do_auth(struct drbd_conf *mdev); static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event); @@ -883,7 +883,7 @@ retry: D_ASSERT(mdev->tconn->asender.task == NULL); - h = drbd_do_handshake(mdev); + h = drbd_do_handshake(mdev->tconn); if (h <= 0) return h; @@ -3991,39 +3991,39 @@ static int drbd_send_handshake(struct drbd_tconn *tconn) * -1 peer talks different language, * no point in trying again, please go standalone. */ -static int drbd_do_handshake(struct drbd_conf *mdev) +static int drbd_do_handshake(struct drbd_tconn *tconn) { - /* ASSERT current == mdev->tconn->receiver ... */ - struct p_handshake *p = &mdev->tconn->data.rbuf.handshake; + /* ASSERT current == tconn->receiver ... */ + struct p_handshake *p = &tconn->data.rbuf.handshake; const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80); struct packet_info pi; int rv; - rv = drbd_send_handshake(mdev->tconn); + rv = drbd_send_handshake(tconn); if (!rv) return 0; - rv = drbd_recv_header(mdev->tconn, &pi); + rv = drbd_recv_header(tconn, &pi); if (!rv) return 0; if (pi.cmd != P_HAND_SHAKE) { - dev_err(DEV, "expected HandShake packet, received: %s (0x%04x)\n", + conn_err(tconn, "expected HandShake packet, received: %s (0x%04x)\n", cmdname(pi.cmd), pi.cmd); return -1; } if (pi.size != expect) { - dev_err(DEV, "expected HandShake length: %u, received: %u\n", + conn_err(tconn, "expected HandShake length: %u, received: %u\n", expect, pi.size); return -1; } - rv = drbd_recv(mdev->tconn, &p->head.payload, expect); + rv = drbd_recv(tconn, &p->head.payload, expect); if (rv != expect) { if (!signal_pending(current)) - dev_warn(DEV, "short read receiving handshake packet: l=%u\n", rv); + conn_warn(tconn, "short read receiving handshake packet: l=%u\n", rv); return 0; } @@ -4036,15 +4036,15 @@ static int drbd_do_handshake(struct drbd_conf *mdev) PRO_VERSION_MIN > p->protocol_max) goto incompat; - mdev->tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max); + tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max); - dev_info(DEV, "Handshake successful: " - "Agreed network protocol version %d\n", mdev->tconn->agreed_pro_version); + conn_info(tconn, "Handshake successful: " + "Agreed network protocol version %d\n", tconn->agreed_pro_version); return 1; incompat: - dev_err(DEV, "incompatible DRBD dialects: " + conn_err(tconn, "incompatible DRBD dialects: " "I support %d-%d, peer supports %d-%d\n", PRO_VERSION_MIN, PRO_VERSION_MAX, p->protocol_min, p->protocol_max); From 611208706f28c502c8c01791ac4f0b14cde395b2 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 8 Feb 2011 09:50:54 +0100 Subject: [PATCH 087/609] drbd: Converted drbd_(get|put)_data_sock() and drbd_send_cmd2() to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 20 ++++++++++---------- drivers/block/drbd/drbd_main.c | 26 +++++++++++++------------- drivers/block/drbd/drbd_receiver.c | 4 ++-- drivers/block/drbd/drbd_worker.c | 4 ++-- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 845ff34d206..f48fe76f015 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1109,26 +1109,26 @@ static inline unsigned int mdev_to_minor(struct drbd_conf *mdev) /* returns 1 if it was successful, * returns 0 if there was no data socket. * so wherever you are going to use the data.socket, e.g. do - * if (!drbd_get_data_sock(mdev)) + * if (!drbd_get_data_sock(mdev->tconn)) * return 0; * CODE(); - * drbd_put_data_sock(mdev); + * drbd_get_data_sock(mdev->tconn); */ -static inline int drbd_get_data_sock(struct drbd_conf *mdev) +static inline int drbd_get_data_sock(struct drbd_tconn *tconn) { - mutex_lock(&mdev->tconn->data.mutex); + mutex_lock(&tconn->data.mutex); /* drbd_disconnect() could have called drbd_free_sock() * while we were waiting in down()... */ - if (unlikely(mdev->tconn->data.socket == NULL)) { - mutex_unlock(&mdev->tconn->data.mutex); + if (unlikely(tconn->data.socket == NULL)) { + mutex_unlock(&tconn->data.mutex); return 0; } return 1; } -static inline void drbd_put_data_sock(struct drbd_conf *mdev) +static inline void drbd_put_data_sock(struct drbd_tconn *tconn) { - mutex_unlock(&mdev->tconn->data.mutex); + mutex_unlock(&tconn->data.mutex); } /* @@ -1171,12 +1171,12 @@ extern int drbd_send_state(struct drbd_conf *mdev); extern int _conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct socket *sock, enum drbd_packet cmd, struct p_header *h, size_t size, unsigned msg_flags); +extern int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, + char *data, size_t size); #define USE_DATA_SOCKET 1 #define USE_META_SOCKET 0 extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, enum drbd_packet cmd, struct p_header *h, size_t size); -extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packet cmd, - char *data, size_t size); extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc); extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2a67e272b16..2703504c7c1 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -727,23 +727,23 @@ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, return ok; } -int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packet cmd, char *data, +int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data, size_t size) { - struct p_header h; + struct p_header80 h; int ok; - prepare_header(mdev, &h, cmd, size); + prepare_header80(&h, cmd, size); - if (!drbd_get_data_sock(mdev)) + if (!drbd_get_data_sock(tconn)) return 0; ok = (sizeof(h) == - drbd_send(mdev->tconn, mdev->tconn->data.socket, &h, sizeof(h), 0)); + drbd_send(tconn, tconn->data.socket, &h, sizeof(h), 0)); ok = ok && (size == - drbd_send(mdev->tconn, mdev->tconn->data.socket, data, size, 0)); + drbd_send(tconn, tconn->data.socket, data, size, 0)); - drbd_put_data_sock(mdev); + drbd_put_data_sock(tconn); return ok; } @@ -1188,10 +1188,10 @@ int drbd_send_bitmap(struct drbd_conf *mdev) { int err; - if (!drbd_get_data_sock(mdev)) + if (!drbd_get_data_sock(mdev->tconn)) return -1; err = !_drbd_send_bitmap(mdev); - drbd_put_data_sock(mdev); + drbd_put_data_sock(mdev->tconn); return err; } @@ -1505,7 +1505,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) void *dgb; int dgs; - if (!drbd_get_data_sock(mdev)) + if (!drbd_get_data_sock(mdev->tconn)) return 0; dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ? @@ -1564,7 +1564,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) } */ } - drbd_put_data_sock(mdev); + drbd_put_data_sock(mdev->tconn); return ok; } @@ -1595,7 +1595,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, * This one may be interrupted by DRBD_SIG and/or DRBD_SIGKILL * in response to admin command or module unload. */ - if (!drbd_get_data_sock(mdev)) + if (!drbd_get_data_sock(mdev->tconn)) return 0; ok = sizeof(p) == drbd_send(mdev->tconn, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0); @@ -1607,7 +1607,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, if (ok) ok = _drbd_send_zc_ee(mdev, peer_req); - drbd_put_data_sock(mdev); + drbd_put_data_sock(mdev->tconn); return ok; } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 0a4d15c913e..b95f81e3278 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4093,7 +4093,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) get_random_bytes(my_challenge, CHALLENGE_LEN); - rv = drbd_send_cmd2(mdev, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN); + rv = conn_send_cmd2(mdev->tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN); if (!rv) goto fail; @@ -4148,7 +4148,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) goto fail; } - rv = drbd_send_cmd2(mdev, P_AUTH_RESPONSE, response, resp_size); + rv = conn_send_cmd2(mdev->tconn, P_AUTH_RESPONSE, response, resp_size); if (!rv) goto fail; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 5be179ba0c7..f5c27bbd814 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1197,7 +1197,7 @@ int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) if (cancel) return 1; - if (!drbd_get_data_sock(mdev)) + if (!drbd_get_data_sock(mdev->tconn)) return 0; p->barrier = b->br_number; /* inc_ap_pending was done where this was queued. @@ -1205,7 +1205,7 @@ int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * or (on connection loss) in w_clear_epoch. */ ok = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_BARRIER, &p->head, sizeof(*p), 0); - drbd_put_data_sock(mdev); + drbd_put_data_sock(mdev->tconn); return ok; } From 13e6037dc991b0664ebb89226d4b68aa820b1fcd Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 8 Feb 2011 09:54:40 +0100 Subject: [PATCH 088/609] drbd: Converted drbd_do_auth() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 58 +++++++++++++++--------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index b95f81e3278..2a3a35be9fc 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -61,7 +61,7 @@ enum finish_epoch { }; static int drbd_do_handshake(struct drbd_tconn *tconn); -static int drbd_do_auth(struct drbd_conf *mdev); +static int drbd_do_auth(struct drbd_tconn *tconn); static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event); static int e_end_block(struct drbd_conf *, struct drbd_work *, int); @@ -889,7 +889,7 @@ retry: if (mdev->tconn->cram_hmac_tfm) { /* drbd_request_state(mdev, NS(conn, WFAuth)); */ - switch (drbd_do_auth(mdev)) { + switch (drbd_do_auth(mdev->tconn)) { case -1: dev_err(DEV, "Authentication of peer failed\n"); return -1; @@ -4052,7 +4052,7 @@ static int drbd_do_handshake(struct drbd_tconn *tconn) } #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE) -static int drbd_do_auth(struct drbd_conf *mdev) +static int drbd_do_auth(struct drbd_tconn *tconn) { dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n"); dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n"); @@ -4067,73 +4067,73 @@ static int drbd_do_auth(struct drbd_conf *mdev) -1 - auth failed, don't try again. */ -static int drbd_do_auth(struct drbd_conf *mdev) +static int drbd_do_auth(struct drbd_tconn *tconn) { char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */ struct scatterlist sg; char *response = NULL; char *right_response = NULL; char *peers_ch = NULL; - unsigned int key_len = strlen(mdev->tconn->net_conf->shared_secret); + unsigned int key_len = strlen(tconn->net_conf->shared_secret); unsigned int resp_size; struct hash_desc desc; struct packet_info pi; int rv; - desc.tfm = mdev->tconn->cram_hmac_tfm; + desc.tfm = tconn->cram_hmac_tfm; desc.flags = 0; - rv = crypto_hash_setkey(mdev->tconn->cram_hmac_tfm, - (u8 *)mdev->tconn->net_conf->shared_secret, key_len); + rv = crypto_hash_setkey(tconn->cram_hmac_tfm, + (u8 *)tconn->net_conf->shared_secret, key_len); if (rv) { - dev_err(DEV, "crypto_hash_setkey() failed with %d\n", rv); + conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv); rv = -1; goto fail; } get_random_bytes(my_challenge, CHALLENGE_LEN); - rv = conn_send_cmd2(mdev->tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN); + rv = conn_send_cmd2(tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN); if (!rv) goto fail; - rv = drbd_recv_header(mdev->tconn, &pi); + rv = drbd_recv_header(tconn, &pi); if (!rv) goto fail; if (pi.cmd != P_AUTH_CHALLENGE) { - dev_err(DEV, "expected AuthChallenge packet, received: %s (0x%04x)\n", + conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n", cmdname(pi.cmd), pi.cmd); rv = 0; goto fail; } if (pi.size > CHALLENGE_LEN * 2) { - dev_err(DEV, "expected AuthChallenge payload too big.\n"); + conn_err(tconn, "expected AuthChallenge payload too big.\n"); rv = -1; goto fail; } peers_ch = kmalloc(pi.size, GFP_NOIO); if (peers_ch == NULL) { - dev_err(DEV, "kmalloc of peers_ch failed\n"); + conn_err(tconn, "kmalloc of peers_ch failed\n"); rv = -1; goto fail; } - rv = drbd_recv(mdev->tconn, peers_ch, pi.size); + rv = drbd_recv(tconn, peers_ch, pi.size); if (rv != pi.size) { if (!signal_pending(current)) - dev_warn(DEV, "short read AuthChallenge: l=%u\n", rv); + conn_warn(tconn, "short read AuthChallenge: l=%u\n", rv); rv = 0; goto fail; } - resp_size = crypto_hash_digestsize(mdev->tconn->cram_hmac_tfm); + resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm); response = kmalloc(resp_size, GFP_NOIO); if (response == NULL) { - dev_err(DEV, "kmalloc of response failed\n"); + conn_err(tconn, "kmalloc of response failed\n"); rv = -1; goto fail; } @@ -4143,44 +4143,44 @@ static int drbd_do_auth(struct drbd_conf *mdev) rv = crypto_hash_digest(&desc, &sg, sg.length, response); if (rv) { - dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv); + conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv); rv = -1; goto fail; } - rv = conn_send_cmd2(mdev->tconn, P_AUTH_RESPONSE, response, resp_size); + rv = conn_send_cmd2(tconn, P_AUTH_RESPONSE, response, resp_size); if (!rv) goto fail; - rv = drbd_recv_header(mdev->tconn, &pi); + rv = drbd_recv_header(tconn, &pi); if (!rv) goto fail; if (pi.cmd != P_AUTH_RESPONSE) { - dev_err(DEV, "expected AuthResponse packet, received: %s (0x%04x)\n", + conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n", cmdname(pi.cmd), pi.cmd); rv = 0; goto fail; } if (pi.size != resp_size) { - dev_err(DEV, "expected AuthResponse payload of wrong size\n"); + conn_err(tconn, "expected AuthResponse payload of wrong size\n"); rv = 0; goto fail; } - rv = drbd_recv(mdev->tconn, response , resp_size); + rv = drbd_recv(tconn, response , resp_size); if (rv != resp_size) { if (!signal_pending(current)) - dev_warn(DEV, "short read receiving AuthResponse: l=%u\n", rv); + conn_warn(tconn, "short read receiving AuthResponse: l=%u\n", rv); rv = 0; goto fail; } right_response = kmalloc(resp_size, GFP_NOIO); if (right_response == NULL) { - dev_err(DEV, "kmalloc of right_response failed\n"); + conn_err(tconn, "kmalloc of right_response failed\n"); rv = -1; goto fail; } @@ -4189,7 +4189,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) rv = crypto_hash_digest(&desc, &sg, sg.length, right_response); if (rv) { - dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv); + conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv); rv = -1; goto fail; } @@ -4197,8 +4197,8 @@ static int drbd_do_auth(struct drbd_conf *mdev) rv = !memcmp(response, right_response, resp_size); if (rv) - dev_info(DEV, "Peer authenticated using %d bytes of '%s' HMAC\n", - resp_size, mdev->tconn->net_conf->cram_hmac_alg); + conn_info(tconn, "Peer authenticated using %d bytes of '%s' HMAC\n", + resp_size, tconn->net_conf->cram_hmac_alg); else rv = -1; From dc8228d107475bdf5458383f0d1fca202d82a184 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 8 Feb 2011 10:13:15 +0100 Subject: [PATCH 089/609] drbd: Converted drbd_send_protocol() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 30 +++++++++++++++--------------- drivers/block/drbd/drbd_receiver.c | 2 +- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index f48fe76f015..ddd2ed7dec1 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1161,7 +1161,7 @@ extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *); extern void drbd_free_sock(struct drbd_conf *mdev); extern int drbd_send(struct drbd_tconn *tconn, struct socket *sock, void *buf, size_t size, unsigned msg_flags); -extern int drbd_send_protocol(struct drbd_conf *mdev); +extern int drbd_send_protocol(struct drbd_tconn *tconn); extern int drbd_send_uuids(struct drbd_conf *mdev); extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2703504c7c1..01749e9731d 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -796,15 +796,15 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) return rv; } -int drbd_send_protocol(struct drbd_conf *mdev) +int drbd_send_protocol(struct drbd_tconn *tconn) { struct p_protocol *p; int size, cf, rv; size = sizeof(struct p_protocol); - if (mdev->tconn->agreed_pro_version >= 87) - size += strlen(mdev->tconn->net_conf->integrity_alg) + 1; + if (tconn->agreed_pro_version >= 87) + size += strlen(tconn->net_conf->integrity_alg) + 1; /* we must not recurse into our own queue, * as that is blocked during handshake */ @@ -812,30 +812,30 @@ int drbd_send_protocol(struct drbd_conf *mdev) if (p == NULL) return 0; - p->protocol = cpu_to_be32(mdev->tconn->net_conf->wire_protocol); - p->after_sb_0p = cpu_to_be32(mdev->tconn->net_conf->after_sb_0p); - p->after_sb_1p = cpu_to_be32(mdev->tconn->net_conf->after_sb_1p); - p->after_sb_2p = cpu_to_be32(mdev->tconn->net_conf->after_sb_2p); - p->two_primaries = cpu_to_be32(mdev->tconn->net_conf->two_primaries); + p->protocol = cpu_to_be32(tconn->net_conf->wire_protocol); + p->after_sb_0p = cpu_to_be32(tconn->net_conf->after_sb_0p); + p->after_sb_1p = cpu_to_be32(tconn->net_conf->after_sb_1p); + p->after_sb_2p = cpu_to_be32(tconn->net_conf->after_sb_2p); + p->two_primaries = cpu_to_be32(tconn->net_conf->two_primaries); cf = 0; - if (mdev->tconn->net_conf->want_lose) + if (tconn->net_conf->want_lose) cf |= CF_WANT_LOSE; - if (mdev->tconn->net_conf->dry_run) { - if (mdev->tconn->agreed_pro_version >= 92) + if (tconn->net_conf->dry_run) { + if (tconn->agreed_pro_version >= 92) cf |= CF_DRY_RUN; else { - dev_err(DEV, "--dry-run is not supported by peer"); + conn_err(tconn, "--dry-run is not supported by peer"); kfree(p); return -1; } } p->conn_flags = cpu_to_be32(cf); - if (mdev->tconn->agreed_pro_version >= 87) - strcpy(p->integrity_alg, mdev->tconn->net_conf->integrity_alg); + if (tconn->agreed_pro_version >= 87) + strcpy(p->integrity_alg, tconn->net_conf->integrity_alg); - rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL, &p->head, size); + rv = conn_send_cmd2(tconn, P_PROTOCOL, p->head.payload, size - sizeof(struct p_header)); kfree(p); return rv; } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 2a3a35be9fc..05d6499da63 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -910,7 +910,7 @@ retry: drbd_thread_start(&mdev->tconn->asender); - if (drbd_send_protocol(mdev) == -1) + if (drbd_send_protocol(mdev->tconn) == -1) return -1; drbd_send_sync_param(mdev, &mdev->sync_conf); drbd_send_sizes(mdev, 0, 0); From 062e879c8b473d2dba270f8244a211b0c4dafe28 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 8 Feb 2011 11:09:18 +0100 Subject: [PATCH 090/609] drbd: Use and idr data structure to map volume numbers to mdev pointers Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 +++- drivers/block/drbd/drbd_main.c | 12 +++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index ddd2ed7dec1..8d32f9dc18e 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -916,8 +917,9 @@ struct drbd_tconn { /* is a resource from the config file */ char *name; /* Resource name */ struct list_head all_tconn; /* List of all drbd_tconn, prot by global_state_lock */ struct drbd_conf *volume0; /* TODO: Remove me again */ - unsigned long flags; + struct idr volumes; /* to mdev mapping */ + unsigned long flags; struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ atomic_t net_cnt; /* Users of net_conf */ wait_queue_head_t net_cnt_wait; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 01749e9731d..254e5c14137 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2183,6 +2183,7 @@ struct drbd_tconn *drbd_new_tconn(char *name) atomic_set(&tconn->net_cnt, 0); init_waitqueue_head(&tconn->net_cnt_wait); + idr_init(&tconn->volumes); write_lock_irq(&global_state_lock); list_add(&tconn->all_tconn, &drbd_tconns); @@ -2202,6 +2203,7 @@ void drbd_free_tconn(struct drbd_tconn *tconn) write_lock_irq(&global_state_lock); list_del(&tconn->all_tconn); write_unlock_irq(&global_state_lock); + idr_destroy(&tconn->volumes); kfree(tconn->name); kfree(tconn->int_dig_out); @@ -2216,6 +2218,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor) struct gendisk *disk; struct request_queue *q; char conn_name[9]; /* drbd1234N */ + int vnr; /* GFP_KERNEL, we are outside of all write-out paths */ mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL); @@ -2225,7 +2228,14 @@ struct drbd_conf *drbd_new_device(unsigned int minor) mdev->tconn = drbd_new_tconn(conn_name); if (!mdev->tconn) goto out_no_tconn; - + if (!idr_pre_get(&mdev->tconn->volumes, GFP_KERNEL)) + goto out_no_cpumask; + if (idr_get_new(&mdev->tconn->volumes, mdev, &vnr)) + goto out_no_cpumask; + if (vnr != 0) { + dev_err(DEV, "vnr = %d\n", vnr); + goto out_no_cpumask; + } if (!zalloc_cpumask_var(&mdev->cpu_mask, GFP_KERNEL)) goto out_no_cpumask; From 907599e0446f03b66257cf79720cc0fc1f37b7e3 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 8 Feb 2011 11:25:37 +0100 Subject: [PATCH 091/609] drbd: Converted drbd_connect() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 102 +++++++++++++++-------------- 1 file changed, 53 insertions(+), 49 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 05d6499da63..28df7cd55b3 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -746,6 +746,24 @@ static int drbd_socket_okay(struct socket **sock) } } +static int drbd_connected(int vnr, void *p, void *data) +{ + struct drbd_conf *mdev = (struct drbd_conf *)p; + int ok = 1; + + atomic_set(&mdev->packet_seq, 0); + mdev->peer_seq = 0; + + ok &= drbd_send_sync_param(mdev, &mdev->sync_conf); + ok &= drbd_send_sizes(mdev, 0, 0); + ok &= drbd_send_uuids(mdev); + ok &= drbd_send_state(mdev); + clear_bit(USE_DEGR_WFC_T, &mdev->flags); + clear_bit(RESIZE_PENDING, &mdev->flags); + + return !ok; +} + /* * return values: * 1 yes, we have a valid connection @@ -754,18 +772,16 @@ static int drbd_socket_okay(struct socket **sock) * no point in trying again, please go standalone. * -2 We do not have a network config... */ -static int drbd_connect(struct drbd_conf *mdev) +static int drbd_connect(struct drbd_tconn *tconn) { struct socket *s, *sock, *msock; int try, h, ok; - D_ASSERT(!mdev->tconn->data.socket); - - if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS) + if (drbd_request_state(tconn->volume0, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS) return -2; - clear_bit(DISCARD_CONCURRENT, &mdev->tconn->flags); - mdev->tconn->agreed_pro_version = 99; + clear_bit(DISCARD_CONCURRENT, &tconn->flags); + tconn->agreed_pro_version = 99; /* agreed_pro_version must be smaller than 100 so we send the old header (h80) in the first packet and in the handshake packet. */ @@ -775,7 +791,7 @@ static int drbd_connect(struct drbd_conf *mdev) do { for (try = 0;;) { /* 3 tries, this should take less than a second! */ - s = drbd_try_connect(mdev->tconn); + s = drbd_try_connect(tconn); if (s || ++try >= 3) break; /* give the other side time to call bind() & listen() */ @@ -784,21 +800,21 @@ static int drbd_connect(struct drbd_conf *mdev) if (s) { if (!sock) { - drbd_send_fp(mdev->tconn, s, P_HAND_SHAKE_S); + drbd_send_fp(tconn, s, P_HAND_SHAKE_S); sock = s; s = NULL; } else if (!msock) { - drbd_send_fp(mdev->tconn, s, P_HAND_SHAKE_M); + drbd_send_fp(tconn, s, P_HAND_SHAKE_M); msock = s; s = NULL; } else { - dev_err(DEV, "Logic error in drbd_connect()\n"); + conn_err(tconn, "Logic error in drbd_connect()\n"); goto out_release_sockets; } } if (sock && msock) { - schedule_timeout_interruptible(mdev->tconn->net_conf->ping_timeo*HZ/10); + schedule_timeout_interruptible(tconn->net_conf->ping_timeo*HZ/10); ok = drbd_socket_okay(&sock); ok = drbd_socket_okay(&msock) && ok; if (ok) @@ -806,41 +822,41 @@ static int drbd_connect(struct drbd_conf *mdev) } retry: - s = drbd_wait_for_connect(mdev->tconn); + s = drbd_wait_for_connect(tconn); if (s) { - try = drbd_recv_fp(mdev->tconn, s); + try = drbd_recv_fp(tconn, s); drbd_socket_okay(&sock); drbd_socket_okay(&msock); switch (try) { case P_HAND_SHAKE_S: if (sock) { - dev_warn(DEV, "initial packet S crossed\n"); + conn_warn(tconn, "initial packet S crossed\n"); sock_release(sock); } sock = s; break; case P_HAND_SHAKE_M: if (msock) { - dev_warn(DEV, "initial packet M crossed\n"); + conn_warn(tconn, "initial packet M crossed\n"); sock_release(msock); } msock = s; - set_bit(DISCARD_CONCURRENT, &mdev->tconn->flags); + set_bit(DISCARD_CONCURRENT, &tconn->flags); break; default: - dev_warn(DEV, "Error receiving initial packet\n"); + conn_warn(tconn, "Error receiving initial packet\n"); sock_release(s); if (random32() & 1) goto retry; } } - if (mdev->state.conn <= C_DISCONNECTING) + if (tconn->volume0->state.conn <= C_DISCONNECTING) goto out_release_sockets; if (signal_pending(current)) { flush_signals(current); smp_rmb(); - if (get_t_state(&mdev->tconn->receiver) == EXITING) + if (get_t_state(&tconn->receiver) == EXITING) goto out_release_sockets; } @@ -862,65 +878,53 @@ retry: msock->sk->sk_priority = TC_PRIO_INTERACTIVE; /* NOT YET ... - * sock->sk->sk_sndtimeo = mdev->tconn->net_conf->timeout*HZ/10; + * sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10; * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; * first set it to the P_HAND_SHAKE timeout, * which we set to 4x the configured ping_timeout. */ sock->sk->sk_sndtimeo = - sock->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_timeo*4*HZ/10; + sock->sk->sk_rcvtimeo = tconn->net_conf->ping_timeo*4*HZ/10; - msock->sk->sk_sndtimeo = mdev->tconn->net_conf->timeout*HZ/10; - msock->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ; + msock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10; + msock->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ; /* we don't want delays. * we use TCP_CORK where appropriate, though */ drbd_tcp_nodelay(sock); drbd_tcp_nodelay(msock); - mdev->tconn->data.socket = sock; - mdev->tconn->meta.socket = msock; - mdev->tconn->last_received = jiffies; + tconn->data.socket = sock; + tconn->meta.socket = msock; + tconn->last_received = jiffies; - D_ASSERT(mdev->tconn->asender.task == NULL); - - h = drbd_do_handshake(mdev->tconn); + h = drbd_do_handshake(tconn); if (h <= 0) return h; - if (mdev->tconn->cram_hmac_tfm) { + if (tconn->cram_hmac_tfm) { /* drbd_request_state(mdev, NS(conn, WFAuth)); */ - switch (drbd_do_auth(mdev->tconn)) { + switch (drbd_do_auth(tconn)) { case -1: - dev_err(DEV, "Authentication of peer failed\n"); + conn_err(tconn, "Authentication of peer failed\n"); return -1; case 0: - dev_err(DEV, "Authentication of peer failed, trying again.\n"); + conn_err(tconn, "Authentication of peer failed, trying again.\n"); return 0; } } - if (drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS) + if (drbd_request_state(tconn->volume0, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS) return 0; - sock->sk->sk_sndtimeo = mdev->tconn->net_conf->timeout*HZ/10; + sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10; sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; - atomic_set(&mdev->packet_seq, 0); - mdev->peer_seq = 0; + drbd_thread_start(&tconn->asender); - drbd_thread_start(&mdev->tconn->asender); - - if (drbd_send_protocol(mdev->tconn) == -1) + if (drbd_send_protocol(tconn) == -1) return -1; - drbd_send_sync_param(mdev, &mdev->sync_conf); - drbd_send_sizes(mdev, 0, 0); - drbd_send_uuids(mdev); - drbd_send_state(mdev); - clear_bit(USE_DEGR_WFC_T, &mdev->flags); - clear_bit(RESIZE_PENDING, &mdev->flags); - mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */ - return 1; + return !idr_for_each(&tconn->volumes, drbd_connected, tconn); out_release_sockets: if (sock) @@ -4222,7 +4226,7 @@ int drbdd_init(struct drbd_thread *thi) dev_info(DEV, "receiver (re)started\n"); do { - h = drbd_connect(mdev); + h = drbd_connect(mdev->tconn); if (h == 0) { drbd_disconnect(mdev); schedule_timeout_interruptible(HZ); From 808222845d62e551630699a1381bbf8a1fd4a286 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 8 Feb 2011 12:46:30 +0100 Subject: [PATCH 092/609] drbd: Converted drbd_calc_cpu_mask() and drbd_thread_current_set_cpu() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 8 ++++---- drivers/block/drbd/drbd_main.c | 25 +++++++++++++++---------- drivers/block/drbd/drbd_nl.c | 6 +++--- drivers/block/drbd/drbd_receiver.c | 4 ++-- drivers/block/drbd/drbd_worker.c | 2 +- 5 files changed, 25 insertions(+), 20 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8d32f9dc18e..1cb513e92b8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -946,6 +946,7 @@ struct drbd_tconn { /* is a resource from the config file */ struct drbd_thread receiver; struct drbd_thread worker; struct drbd_thread asender; + cpumask_var_t cpu_mask; }; struct drbd_conf { @@ -1075,7 +1076,6 @@ struct drbd_conf { spinlock_t peer_seq_lock; unsigned int minor; unsigned long comm_bm_set; /* communicated number of set bits. */ - cpumask_var_t cpu_mask; struct bm_io_work bm_io_work; u64 ed_uuid; /* UUID of the exposed data */ struct mutex state_mutex; @@ -1149,10 +1149,10 @@ extern int drbd_thread_start(struct drbd_thread *thi); extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait); extern char *drbd_task_to_thread_name(struct drbd_conf *mdev, struct task_struct *task); #ifdef CONFIG_SMP -extern void drbd_thread_current_set_cpu(struct drbd_conf *mdev, struct drbd_thread *thi); -extern void drbd_calc_cpu_mask(struct drbd_conf *mdev); +extern void drbd_thread_current_set_cpu(struct drbd_thread *thi); +extern void drbd_calc_cpu_mask(struct drbd_tconn *tconn); #else -#define drbd_thread_current_set_cpu(A, B) ({}) +#define drbd_thread_current_set_cpu(A) ({}) #define drbd_calc_cpu_mask(A) ({}) #endif extern void drbd_free_resources(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 254e5c14137..3bb412c8272 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -606,6 +606,12 @@ char *drbd_task_to_thread_name(struct drbd_conf *mdev, struct task_struct *task) } #ifdef CONFIG_SMP +static int conn_lowest_minor(struct drbd_tconn *tconn) +{ + int minor = 0; + idr_get_next(&tconn->volumes, &minor); + return minor; +} /** * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs * @mdev: DRBD device. @@ -613,23 +619,23 @@ char *drbd_task_to_thread_name(struct drbd_conf *mdev, struct task_struct *task) * Forces all threads of a device onto the same CPU. This is beneficial for * DRBD's performance. May be overwritten by user's configuration. */ -void drbd_calc_cpu_mask(struct drbd_conf *mdev) +void drbd_calc_cpu_mask(struct drbd_tconn *tconn) { int ord, cpu; /* user override. */ - if (cpumask_weight(mdev->cpu_mask)) + if (cpumask_weight(tconn->cpu_mask)) return; - ord = mdev_to_minor(mdev) % cpumask_weight(cpu_online_mask); + ord = conn_lowest_minor(tconn) % cpumask_weight(cpu_online_mask); for_each_online_cpu(cpu) { if (ord-- == 0) { - cpumask_set_cpu(cpu, mdev->cpu_mask); + cpumask_set_cpu(cpu, tconn->cpu_mask); return; } } /* should not be reached */ - cpumask_setall(mdev->cpu_mask); + cpumask_setall(tconn->cpu_mask); } /** @@ -640,14 +646,14 @@ void drbd_calc_cpu_mask(struct drbd_conf *mdev) * call in the "main loop" of _all_ threads, no need for any mutex, current won't die * prematurely. */ -void drbd_thread_current_set_cpu(struct drbd_conf *mdev, struct drbd_thread *thi) +void drbd_thread_current_set_cpu(struct drbd_thread *thi) { struct task_struct *p = current; if (!thi->reset_cpu_mask) return; thi->reset_cpu_mask = 0; - set_cpus_allowed_ptr(p, mdev->cpu_mask); + set_cpus_allowed_ptr(p, thi->mdev->tconn->cpu_mask); } #endif @@ -2236,7 +2242,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor) dev_err(DEV, "vnr = %d\n", vnr); goto out_no_cpumask; } - if (!zalloc_cpumask_var(&mdev->cpu_mask, GFP_KERNEL)) + if (!zalloc_cpumask_var(&mdev->tconn->cpu_mask, GFP_KERNEL)) goto out_no_cpumask; mdev->tconn->volume0 = mdev; @@ -2313,7 +2319,7 @@ out_no_io_page: out_no_disk: blk_cleanup_queue(q); out_no_q: - free_cpumask_var(mdev->cpu_mask); + free_cpumask_var(mdev->tconn->cpu_mask); out_no_cpumask: drbd_free_tconn(mdev->tconn); out_no_tconn: @@ -2332,7 +2338,6 @@ void drbd_free_mdev(struct drbd_conf *mdev) __free_page(mdev->md_io_page); put_disk(mdev->vdisk); blk_cleanup_queue(mdev->rq_queue); - free_cpumask_var(mdev->cpu_mask); kfree(mdev); } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index df36a573cd4..331495fec67 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1884,9 +1884,9 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n if (mdev->state.conn >= C_CONNECTED) drbd_send_sync_param(mdev, &sc); - if (!cpumask_equal(mdev->cpu_mask, new_cpu_mask)) { - cpumask_copy(mdev->cpu_mask, new_cpu_mask); - drbd_calc_cpu_mask(mdev); + if (!cpumask_equal(mdev->tconn->cpu_mask, new_cpu_mask)) { + cpumask_copy(mdev->tconn->cpu_mask, new_cpu_mask); + drbd_calc_cpu_mask(mdev->tconn); mdev->tconn->receiver.reset_cpu_mask = 1; mdev->tconn->asender.reset_cpu_mask = 1; mdev->tconn->worker.reset_cpu_mask = 1; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 28df7cd55b3..c8d173c1139 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3779,7 +3779,7 @@ static void drbdd(struct drbd_conf *mdev) int rv; while (get_t_state(&mdev->tconn->receiver) == RUNNING) { - drbd_thread_current_set_cpu(mdev, &mdev->tconn->receiver); + drbd_thread_current_set_cpu(&mdev->tconn->receiver); if (!drbd_recv_header(mdev->tconn, &pi)) goto err_out; @@ -4568,7 +4568,7 @@ int drbd_asender(struct drbd_thread *thi) current->rt_priority = 2; /* more important than all other tasks */ while (get_t_state(thi) == RUNNING) { - drbd_thread_current_set_cpu(mdev, thi); + drbd_thread_current_set_cpu(thi); if (test_and_clear_bit(SEND_PING, &mdev->tconn->flags)) { if (!drbd_send_ping(mdev)) { dev_err(DEV, "drbd_send_ping has failed\n"); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index f5c27bbd814..16db1f47c60 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1634,7 +1634,7 @@ int drbd_worker(struct drbd_thread *thi) sprintf(current->comm, "drbd%d_worker", mdev_to_minor(mdev)); while (get_t_state(thi) == RUNNING) { - drbd_thread_current_set_cpu(mdev, thi); + drbd_thread_current_set_cpu(thi); if (down_trylock(&mdev->tconn->data.work.s)) { mutex_lock(&mdev->tconn->data.mutex); From eefc2f7de2e4a35247c932a2c09f1890864a8381 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 8 Feb 2011 12:55:24 +0100 Subject: [PATCH 093/609] drbd: Converted drbdd() from mdev to tconn The drbd_md_sync(mdev) happens in the after state change anyways... Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 5 +++++ drivers/block/drbd/drbd_receiver.c | 30 ++++++++++++++---------------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 1cb513e92b8..a51d0a46146 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1108,6 +1108,11 @@ static inline unsigned int mdev_to_minor(struct drbd_conf *mdev) return mdev->minor; } +static inline struct drbd_conf *vnr_to_mdev(struct drbd_tconn *tconn, int vnr) +{ + return (struct drbd_conf *)idr_find(&tconn->volumes, vnr); +} + /* returns 1 if it was successful, * returns 0 if there was no data socket. * so wherever you are going to use the data.socket, e.g. do diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c8d173c1139..4c61802c342 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -939,6 +939,7 @@ static bool decode_header(struct drbd_tconn *tconn, struct p_header *h, struct p if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) { pi->cmd = be16_to_cpu(h->h80.command); pi->size = be16_to_cpu(h->h80.length); + pi->vnr = 0; } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) { pi->cmd = be16_to_cpu(h->h95.command); pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff; @@ -3771,42 +3772,42 @@ static struct data_cmd drbd_cmd_handler[] = { p_header, but they may not rely on that. Since there is also p_header95 ! */ -static void drbdd(struct drbd_conf *mdev) +static void drbdd(struct drbd_tconn *tconn) { - struct p_header *header = &mdev->tconn->data.rbuf.header; + struct p_header *header = &tconn->data.rbuf.header; struct packet_info pi; size_t shs; /* sub header size */ int rv; - while (get_t_state(&mdev->tconn->receiver) == RUNNING) { - drbd_thread_current_set_cpu(&mdev->tconn->receiver); - if (!drbd_recv_header(mdev->tconn, &pi)) + while (get_t_state(&tconn->receiver) == RUNNING) { + drbd_thread_current_set_cpu(&tconn->receiver); + if (!drbd_recv_header(tconn, &pi)) goto err_out; if (unlikely(pi.cmd >= P_MAX_CMD || !drbd_cmd_handler[pi.cmd].function)) { - dev_err(DEV, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size); + conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size); goto err_out; } shs = drbd_cmd_handler[pi.cmd].pkt_size - sizeof(struct p_header); if (pi.size - shs > 0 && !drbd_cmd_handler[pi.cmd].expect_payload) { - dev_err(DEV, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size); + conn_err(tconn, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size); goto err_out; } if (shs) { - rv = drbd_recv(mdev->tconn, &header->payload, shs); + rv = drbd_recv(tconn, &header->payload, shs); if (unlikely(rv != shs)) { if (!signal_pending(current)) - dev_warn(DEV, "short read while reading sub header: rv=%d\n", rv); + conn_warn(tconn, "short read while reading sub header: rv=%d\n", rv); goto err_out; } } - rv = drbd_cmd_handler[pi.cmd].function(mdev, pi.cmd, pi.size - shs); + rv = drbd_cmd_handler[pi.cmd].function(vnr_to_mdev(tconn, pi.vnr), pi.cmd, pi.size - shs); if (unlikely(!rv)) { - dev_err(DEV, "error receiving %s, l: %d!\n", + conn_err(tconn, "error receiving %s, l: %d!\n", cmdname(pi.cmd), pi.size); goto err_out; } @@ -3814,11 +3815,8 @@ static void drbdd(struct drbd_conf *mdev) if (0) { err_out: - drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); + drbd_force_state(tconn->volume0, NS(conn, C_PROTOCOL_ERROR)); } - /* If we leave here, we probably want to update at least the - * "Connected" indicator on stable storage. Do so explicitly here. */ - drbd_md_sync(mdev); } void drbd_flush_workqueue(struct drbd_tconn *tconn) @@ -4239,7 +4237,7 @@ int drbdd_init(struct drbd_thread *thi) if (h > 0) { if (get_net_conf(mdev->tconn)) { - drbdd(mdev); + drbdd(mdev->tconn); put_net_conf(mdev->tconn); } } From 360cc7405295d1f604d5689e8d6c206968d47886 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 8 Feb 2011 14:29:53 +0100 Subject: [PATCH 094/609] drbd: Converted drbd_free_sock() and drbd_disconnect() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 28 ++++++------ drivers/block/drbd/drbd_receiver.c | 72 +++++++++++++++++------------- 3 files changed, 56 insertions(+), 46 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index a51d0a46146..a70365452d2 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1165,7 +1165,7 @@ extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, unsigned int set_size); extern void tl_clear(struct drbd_conf *mdev); extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *); -extern void drbd_free_sock(struct drbd_conf *mdev); +extern void drbd_free_sock(struct drbd_tconn *tconn); extern int drbd_send(struct drbd_tconn *tconn, struct socket *sock, void *buf, size_t size, unsigned msg_flags); extern int drbd_send_protocol(struct drbd_tconn *tconn); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 3bb412c8272..a26ec93a9d7 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2430,21 +2430,21 @@ void drbd_free_bc(struct drbd_backing_dev *ldev) kfree(ldev); } -void drbd_free_sock(struct drbd_conf *mdev) +void drbd_free_sock(struct drbd_tconn *tconn) { - if (mdev->tconn->data.socket) { - mutex_lock(&mdev->tconn->data.mutex); - kernel_sock_shutdown(mdev->tconn->data.socket, SHUT_RDWR); - sock_release(mdev->tconn->data.socket); - mdev->tconn->data.socket = NULL; - mutex_unlock(&mdev->tconn->data.mutex); + if (tconn->data.socket) { + mutex_lock(&tconn->data.mutex); + kernel_sock_shutdown(tconn->data.socket, SHUT_RDWR); + sock_release(tconn->data.socket); + tconn->data.socket = NULL; + mutex_unlock(&tconn->data.mutex); } - if (mdev->tconn->meta.socket) { - mutex_lock(&mdev->tconn->meta.mutex); - kernel_sock_shutdown(mdev->tconn->meta.socket, SHUT_RDWR); - sock_release(mdev->tconn->meta.socket); - mdev->tconn->meta.socket = NULL; - mutex_unlock(&mdev->tconn->meta.mutex); + if (tconn->meta.socket) { + mutex_lock(&tconn->meta.mutex); + kernel_sock_shutdown(tconn->meta.socket, SHUT_RDWR); + sock_release(tconn->meta.socket); + tconn->meta.socket = NULL; + mutex_unlock(&tconn->meta.mutex); } } @@ -2462,7 +2462,7 @@ void drbd_free_resources(struct drbd_conf *mdev) crypto_free_hash(mdev->tconn->integrity_r_tfm); mdev->tconn->integrity_r_tfm = NULL; - drbd_free_sock(mdev); + drbd_free_sock(mdev->tconn); __no_warn(local, drbd_free_bc(mdev->ldev); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 4c61802c342..2e5318f9422 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -62,6 +62,7 @@ enum finish_epoch { static int drbd_do_handshake(struct drbd_tconn *tconn); static int drbd_do_auth(struct drbd_tconn *tconn); +static int drbd_disconnected(int vnr, void *p, void *data); static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event); static int e_end_block(struct drbd_conf *, struct drbd_work *, int); @@ -3829,19 +3830,49 @@ void drbd_flush_workqueue(struct drbd_tconn *tconn) wait_for_completion(&barr.done); } -static void drbd_disconnect(struct drbd_conf *mdev) +static void drbd_disconnect(struct drbd_tconn *tconn) { - enum drbd_fencing_p fp; union drbd_state os, ns; int rv = SS_UNKNOWN_ERROR; - unsigned int i; - if (mdev->state.conn == C_STANDALONE) + if (tconn->volume0->state.conn == C_STANDALONE) return; /* asender does not clean up anything. it must not interfere, either */ - drbd_thread_stop(&mdev->tconn->asender); - drbd_free_sock(mdev); + drbd_thread_stop(&tconn->asender); + drbd_free_sock(tconn); + + idr_for_each(&tconn->volumes, drbd_disconnected, tconn); + + conn_info(tconn, "Connection closed\n"); + + spin_lock_irq(&tconn->req_lock); + os = tconn->volume0->state; + if (os.conn >= C_UNCONNECTED) { + /* Do not restart in case we are C_DISCONNECTING */ + ns.i = os.i; + ns.conn = C_UNCONNECTED; + rv = _drbd_set_state(tconn->volume0, ns, CS_VERBOSE, NULL); + } + spin_unlock_irq(&tconn->req_lock); + + if (os.conn == C_DISCONNECTING) { + wait_event(tconn->net_cnt_wait, atomic_read(&tconn->net_cnt) == 0); + + crypto_free_hash(tconn->cram_hmac_tfm); + tconn->cram_hmac_tfm = NULL; + + kfree(tconn->net_conf); + tconn->net_conf = NULL; + drbd_request_state(tconn->volume0, NS(conn, C_STANDALONE)); + } +} + +static int drbd_disconnected(int vnr, void *p, void *data) +{ + struct drbd_conf *mdev = (struct drbd_conf *)p; + enum drbd_fencing_p fp; + unsigned int i; /* wait for current activity to cease. */ spin_lock_irq(&mdev->tconn->req_lock); @@ -3887,8 +3918,6 @@ static void drbd_disconnect(struct drbd_conf *mdev) if (!is_susp(mdev->state)) tl_clear(mdev); - dev_info(DEV, "Connection closed\n"); - drbd_md_sync(mdev); fp = FP_DONT_CARE; @@ -3900,27 +3929,6 @@ static void drbd_disconnect(struct drbd_conf *mdev) if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN) drbd_try_outdate_peer_async(mdev); - spin_lock_irq(&mdev->tconn->req_lock); - os = mdev->state; - if (os.conn >= C_UNCONNECTED) { - /* Do not restart in case we are C_DISCONNECTING */ - ns = os; - ns.conn = C_UNCONNECTED; - rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); - } - spin_unlock_irq(&mdev->tconn->req_lock); - - if (os.conn == C_DISCONNECTING) { - wait_event(mdev->tconn->net_cnt_wait, atomic_read(&mdev->tconn->net_cnt) == 0); - - crypto_free_hash(mdev->tconn->cram_hmac_tfm); - mdev->tconn->cram_hmac_tfm = NULL; - - kfree(mdev->tconn->net_conf); - mdev->tconn->net_conf = NULL; - drbd_request_state(mdev, NS(conn, C_STANDALONE)); - } - /* serialize with bitmap writeout triggered by the state change, * if any. */ wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); @@ -3950,6 +3958,8 @@ static void drbd_disconnect(struct drbd_conf *mdev) /* ok, no more ee's on the fly, it is safe to reset the epoch_size */ atomic_set(&mdev->current_epoch->epoch_size, 0); D_ASSERT(list_empty(&mdev->current_epoch->list)); + + return 0; } /* @@ -4226,7 +4236,7 @@ int drbdd_init(struct drbd_thread *thi) do { h = drbd_connect(mdev->tconn); if (h == 0) { - drbd_disconnect(mdev); + drbd_disconnect(mdev->tconn); schedule_timeout_interruptible(HZ); } if (h == -1) { @@ -4242,7 +4252,7 @@ int drbdd_init(struct drbd_thread *thi) } } - drbd_disconnect(mdev); + drbd_disconnect(mdev->tconn); dev_info(DEV, "receiver terminated\n"); return 0; From a21e9298275a0145e43c2413725549112d99ba01 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 8 Feb 2011 15:08:49 +0100 Subject: [PATCH 095/609] drbd: Moved the mdev member into drbd_work (from drbd_request and drbd_peer_request) Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 2 ++ drivers/block/drbd/drbd_int.h | 5 ++--- drivers/block/drbd/drbd_main.c | 8 ++++++++ drivers/block/drbd/drbd_nl.c | 6 +++--- drivers/block/drbd/drbd_receiver.c | 10 ++++++---- drivers/block/drbd/drbd_req.c | 11 ++++++----- drivers/block/drbd/drbd_req.h | 4 ++-- drivers/block/drbd/drbd_state.c | 1 + drivers/block/drbd/drbd_worker.c | 8 ++++---- 9 files changed, 34 insertions(+), 21 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 794317778db..637a9378567 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -228,6 +228,7 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) al_work.enr = enr; al_work.old_enr = al_ext->lc_number; al_work.w.cb = w_al_write_transaction; + al_work.w.mdev = mdev; drbd_queue_work_front(&mdev->tconn->data.work, &al_work.w); wait_for_completion(&al_work.event); @@ -717,6 +718,7 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, if (udw) { udw->enr = ext->lce.lc_number; udw->w.cb = w_update_odbm; + udw->w.mdev = mdev; drbd_queue_work_front(&mdev->tconn->data.work, &udw->w); } else { dev_warn(DEV, "Could not kmalloc an udw\n"); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index a70365452d2..be067bfbace 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -645,13 +645,13 @@ typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel); struct drbd_work { struct list_head list; drbd_work_cb cb; + struct drbd_conf *mdev; }; #include "drbd_interval.h" struct drbd_request { struct drbd_work w; - struct drbd_conf *mdev; /* if local IO is not allowed, will be NULL. * if local IO _is_ allowed, holds the locally submitted bio clone, @@ -715,7 +715,6 @@ struct digest_info { struct drbd_peer_request { struct drbd_work w; struct drbd_epoch *epoch; /* for writes */ - struct drbd_conf *mdev; struct page *pages; atomic_t pending_bios; struct drbd_interval i; @@ -1537,7 +1536,7 @@ extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head); extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled); extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed); -extern void drbd_flush_workqueue(struct drbd_tconn *tconn); +extern void drbd_flush_workqueue(struct drbd_conf *mdev); /* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to * mess with get_fs/set_fs, we know we are KERNEL_DS always. */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index a26ec93a9d7..e89ec80395d 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1836,6 +1836,14 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) mdev->md_sync_work.cb = w_md_sync; mdev->bm_io_work.w.cb = w_bitmap_io; mdev->start_resync_work.cb = w_start_resync; + + mdev->resync_work.mdev = mdev; + mdev->unplug_work.mdev = mdev; + mdev->go_diskless.mdev = mdev; + mdev->md_sync_work.mdev = mdev; + mdev->bm_io_work.w.mdev = mdev; + mdev->start_resync_work.mdev = mdev; + init_timer(&mdev->resync_timer); init_timer(&mdev->md_sync_timer); init_timer(&mdev->start_resync_timer); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 331495fec67..0debe589b67 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -876,7 +876,7 @@ static void drbd_reconfig_start(struct drbd_conf *mdev) wait_event(mdev->state_wait, !test_and_set_bit(CONFIG_PENDING, &mdev->flags)); wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags)); drbd_thread_start(&mdev->tconn->worker); - drbd_flush_workqueue(mdev->tconn); + drbd_flush_workqueue(mdev); } /* if still unconfigured, stops worker again. @@ -1076,7 +1076,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp /* also wait for the last barrier ack. */ wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || is_susp(mdev->state)); /* and for any other previously queued work */ - drbd_flush_workqueue(mdev->tconn); + drbd_flush_workqueue(mdev); rv = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE); retcode = rv; /* FIXME: Type mismatch. */ @@ -1520,7 +1520,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, } } - drbd_flush_workqueue(mdev->tconn); + drbd_flush_workqueue(mdev); spin_lock_irq(&mdev->tconn->req_lock); if (mdev->tconn->net_conf != NULL) { retcode = ERR_NET_CONFIGURED; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 2e5318f9422..4aa75bad16c 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -345,7 +345,7 @@ drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector, peer_req->i.waiting = false; peer_req->epoch = NULL; - peer_req->mdev = mdev; + peer_req->w.mdev = mdev; peer_req->pages = page; atomic_set(&peer_req->pending_bios, 0); peer_req->flags = 0; @@ -3820,13 +3820,14 @@ static void drbdd(struct drbd_tconn *tconn) } } -void drbd_flush_workqueue(struct drbd_tconn *tconn) +void drbd_flush_workqueue(struct drbd_conf *mdev) { struct drbd_wq_barrier barr; barr.w.cb = w_prev_work_done; + barr.w.mdev = mdev; init_completion(&barr.done); - drbd_queue_work(&tconn->data.work, &barr.w); + drbd_queue_work(&mdev->tconn->data.work, &barr.w); wait_for_completion(&barr.done); } @@ -3906,7 +3907,7 @@ static int drbd_disconnected(int vnr, void *p, void *data) /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier, * w_make_resync_request etc. which may still be on the worker queue * to be "canceled" */ - drbd_flush_workqueue(mdev->tconn); + drbd_flush_workqueue(mdev); /* This also does reclaim_net_ee(). If we do this too early, we might * miss some resync ee and pages.*/ @@ -4507,6 +4508,7 @@ static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd) w = kmalloc(sizeof(*w), GFP_NOIO); if (w) { w->cb = w_ov_finished; + w->mdev = mdev; drbd_queue_work_front(&mdev->tconn->data.work, w); } else { dev_err(DEV, "kmalloc(w) failed."); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 6bcf4171a76..45a543e5c6a 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -67,7 +67,7 @@ static struct drbd_request *drbd_req_new(struct drbd_conf *mdev, drbd_req_make_private_bio(req, bio_src); req->rq_state = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0; - req->mdev = mdev; + req->w.mdev = mdev; req->master_bio = bio_src; req->epoch = 0; @@ -155,6 +155,7 @@ static void queue_barrier(struct drbd_conf *mdev) b = mdev->tconn->newest_tle; b->w.cb = w_send_barrier; + b->w.mdev = mdev; /* inc_ap_pending done here, so we won't * get imbalanced on connection loss. * dec_ap_pending will be done in got_BarrierAck @@ -192,7 +193,7 @@ void complete_master_bio(struct drbd_conf *mdev, static void drbd_remove_request_interval(struct rb_root *root, struct drbd_request *req) { - struct drbd_conf *mdev = req->mdev; + struct drbd_conf *mdev = req->w.mdev; struct drbd_interval *i = &req->i; drbd_remove_interval(root, i); @@ -211,7 +212,7 @@ static void drbd_remove_request_interval(struct rb_root *root, void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) { const unsigned long s = req->rq_state; - struct drbd_conf *mdev = req->mdev; + struct drbd_conf *mdev = req->w.mdev; /* only WRITES may end up here without a master bio (on barrier ack) */ int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE; @@ -294,7 +295,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_error *m) { - struct drbd_conf *mdev = req->mdev; + struct drbd_conf *mdev = req->w.mdev; if (!is_susp(mdev->state)) _req_may_be_done(req, m); @@ -315,7 +316,7 @@ static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_e int __req_mod(struct drbd_request *req, enum drbd_req_event what, struct bio_and_error *m) { - struct drbd_conf *mdev = req->mdev; + struct drbd_conf *mdev = req->w.mdev; int rv = 0; m->bio = NULL; diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 431e3f962c3..e6232ce5a1c 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -255,7 +255,7 @@ extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what); * outside the spinlock, e.g. when walking some list on cleanup. */ static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what) { - struct drbd_conf *mdev = req->mdev; + struct drbd_conf *mdev = req->w.mdev; struct bio_and_error m; int rv; @@ -275,7 +275,7 @@ static inline int req_mod(struct drbd_request *req, enum drbd_req_event what) { unsigned long flags; - struct drbd_conf *mdev = req->mdev; + struct drbd_conf *mdev = req->w.mdev; struct bio_and_error m; int rv; diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 38d330b7b66..36679841af6 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -843,6 +843,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, ascw->ns = ns; ascw->flags = flags; ascw->w.cb = w_after_state_ch; + ascw->w.mdev = mdev; ascw->done = done; drbd_queue_work(&mdev->tconn->data.work, &ascw->w); } else { diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 16db1f47c60..cac65f67c14 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -83,7 +83,7 @@ void drbd_md_io_complete(struct bio *bio, int error) void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local) { unsigned long flags = 0; - struct drbd_conf *mdev = peer_req->mdev; + struct drbd_conf *mdev = peer_req->w.mdev; spin_lock_irqsave(&mdev->tconn->req_lock, flags); mdev->read_cnt += peer_req->i.size >> 9; @@ -103,7 +103,7 @@ void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(lo static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local) { unsigned long flags = 0; - struct drbd_conf *mdev = peer_req->mdev; + struct drbd_conf *mdev = peer_req->w.mdev; sector_t e_sector; int do_wake; u64 block_id; @@ -155,7 +155,7 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel void drbd_endio_sec(struct bio *bio, int error) { struct drbd_peer_request *peer_req = bio->bi_private; - struct drbd_conf *mdev = peer_req->mdev; + struct drbd_conf *mdev = peer_req->w.mdev; int uptodate = bio_flagged(bio, BIO_UPTODATE); int is_write = bio_data_dir(bio) == WRITE; @@ -192,7 +192,7 @@ void drbd_endio_pri(struct bio *bio, int error) { unsigned long flags; struct drbd_request *req = bio->bi_private; - struct drbd_conf *mdev = req->mdev; + struct drbd_conf *mdev = req->w.mdev; struct bio_and_error m; enum drbd_req_event what; int uptodate = bio_flagged(bio, BIO_UPTODATE); From f1b3a6ec7d2b3033b18c6ad125f5694c85599c4a Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 8 Feb 2011 15:35:58 +0100 Subject: [PATCH 096/609] drbd: Consolidated the setup of the thread name into the framework Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 3 +++ drivers/block/drbd/drbd_receiver.c | 5 ----- drivers/block/drbd/drbd_worker.c | 2 -- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index e89ec80395d..0861746a747 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -446,6 +446,9 @@ static int drbd_thread_setup(void *arg) unsigned long flags; int retval; + snprintf(current->comm, sizeof(current->comm), "drbd_%c_%s", + thi->name[0], thi->mdev->tconn->name); + restart: retval = thi->function(thi); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 4aa75bad16c..ab9b505c0f0 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4227,11 +4227,8 @@ static int drbd_do_auth(struct drbd_tconn *tconn) int drbdd_init(struct drbd_thread *thi) { struct drbd_conf *mdev = thi->mdev; - unsigned int minor = mdev_to_minor(mdev); int h; - sprintf(current->comm, "drbd%d_receiver", minor); - dev_info(DEV, "receiver (re)started\n"); do { @@ -4572,8 +4569,6 @@ int drbd_asender(struct drbd_thread *thi) int ping_timeout_active = 0; int empty; - sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); - current->policy = SCHED_RR; /* Make this a realtime task! */ current->rt_priority = 2; /* more important than all other tasks */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index cac65f67c14..6f709621ae2 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1631,8 +1631,6 @@ int drbd_worker(struct drbd_thread *thi) LIST_HEAD(work_list); int intr = 0, i; - sprintf(current->comm, "drbd%d_worker", mdev_to_minor(mdev)); - while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); From 4d641dd7b027dd494c9ae72b0723f612aca621bd Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 8 Feb 2011 15:40:24 +0100 Subject: [PATCH 097/609] drbd: Converted drbdd_init() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index ab9b505c0f0..16d33315e92 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4226,33 +4226,33 @@ static int drbd_do_auth(struct drbd_tconn *tconn) int drbdd_init(struct drbd_thread *thi) { - struct drbd_conf *mdev = thi->mdev; + struct drbd_tconn *tconn = thi->mdev->tconn; int h; - dev_info(DEV, "receiver (re)started\n"); + conn_info(tconn, "receiver (re)started\n"); do { - h = drbd_connect(mdev->tconn); + h = drbd_connect(tconn); if (h == 0) { - drbd_disconnect(mdev->tconn); + drbd_disconnect(tconn); schedule_timeout_interruptible(HZ); } if (h == -1) { - dev_warn(DEV, "Discarding network configuration.\n"); - drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + conn_warn(tconn, "Discarding network configuration.\n"); + drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING)); } } while (h == 0); if (h > 0) { - if (get_net_conf(mdev->tconn)) { - drbdd(mdev->tconn); - put_net_conf(mdev->tconn); + if (get_net_conf(tconn)) { + drbdd(tconn); + put_net_conf(tconn); } } - drbd_disconnect(mdev->tconn); + drbd_disconnect(tconn); - dev_info(DEV, "receiver terminated\n"); + conn_info(tconn, "receiver terminated\n"); return 0; } From 32862ec705d4b8ecf37e41cc65aa1bd84f990c75 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 8 Feb 2011 16:41:01 +0100 Subject: [PATCH 098/609] drbd: Converted drbd_asender() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 125 ++++++++++++++++------------- 1 file changed, 69 insertions(+), 56 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 16d33315e92..f0cd3819fff 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -392,9 +392,7 @@ int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list) } -/* - * This function is called from _asender only_ - * but see also comments in _req_mod(,BARRIER_ACKED) +/* See also comments in _req_mod(,BARRIER_ACKED) * and receive_Barrier. * * Move entries from net_ee to done_ee, if ready. @@ -4555,66 +4553,85 @@ static struct asender_cmd *get_asender_cmd(int cmd) return &asender_tbl[cmd]; } +static int _drbd_process_done_ee(int vnr, void *p, void *data) +{ + struct drbd_conf *mdev = (struct drbd_conf *)p; + return !drbd_process_done_ee(mdev); +} + +static int _check_ee_empty(int vnr, void *p, void *data) +{ + struct drbd_conf *mdev = (struct drbd_conf *)p; + struct drbd_tconn *tconn = mdev->tconn; + int not_empty; + + spin_lock_irq(&tconn->req_lock); + not_empty = !list_empty(&mdev->done_ee); + spin_unlock_irq(&tconn->req_lock); + + return not_empty; +} + +static int tconn_process_done_ee(struct drbd_tconn *tconn) +{ + int not_empty, err; + + do { + clear_bit(SIGNAL_ASENDER, &tconn->flags); + flush_signals(current); + err = idr_for_each(&tconn->volumes, _drbd_process_done_ee, NULL); + if (err) + return err; + set_bit(SIGNAL_ASENDER, &tconn->flags); + not_empty = idr_for_each(&tconn->volumes, _check_ee_empty, NULL); + } while (not_empty); + + return 0; +} + int drbd_asender(struct drbd_thread *thi) { - struct drbd_conf *mdev = thi->mdev; - struct p_header *h = &mdev->tconn->meta.rbuf.header; + struct drbd_tconn *tconn = thi->mdev->tconn; + struct p_header *h = &tconn->meta.rbuf.header; struct asender_cmd *cmd = NULL; struct packet_info pi; - int rv; void *buf = h; int received = 0; int expect = sizeof(struct p_header); int ping_timeout_active = 0; - int empty; current->policy = SCHED_RR; /* Make this a realtime task! */ current->rt_priority = 2; /* more important than all other tasks */ while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); - if (test_and_clear_bit(SEND_PING, &mdev->tconn->flags)) { - if (!drbd_send_ping(mdev)) { - dev_err(DEV, "drbd_send_ping has failed\n"); + if (test_and_clear_bit(SEND_PING, &tconn->flags)) { + if (!drbd_send_ping(tconn->volume0)) { + conn_err(tconn, "drbd_send_ping has failed\n"); goto reconnect; } - mdev->tconn->meta.socket->sk->sk_rcvtimeo = - mdev->tconn->net_conf->ping_timeo*HZ/10; + tconn->meta.socket->sk->sk_rcvtimeo = + tconn->net_conf->ping_timeo*HZ/10; ping_timeout_active = 1; } - /* conditionally cork; - * it may hurt latency if we cork without much to send */ - if (!mdev->tconn->net_conf->no_cork && - 3 < atomic_read(&mdev->unacked_cnt)) - drbd_tcp_cork(mdev->tconn->meta.socket); - while (1) { - clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags); - flush_signals(current); - if (!drbd_process_done_ee(mdev)) - goto reconnect; - /* to avoid race with newly queued ACKs */ - set_bit(SIGNAL_ASENDER, &mdev->tconn->flags); - spin_lock_irq(&mdev->tconn->req_lock); - empty = list_empty(&mdev->done_ee); - spin_unlock_irq(&mdev->tconn->req_lock); - /* new ack may have been queued right here, - * but then there is also a signal pending, - * and we start over... */ - if (empty) - break; - } + /* TODO: conditionally cork; it may hurt latency if we cork without + much to send */ + if (!tconn->net_conf->no_cork) + drbd_tcp_cork(tconn->meta.socket); + if (tconn_process_done_ee(tconn)) + goto reconnect; /* but unconditionally uncork unless disabled */ - if (!mdev->tconn->net_conf->no_cork) - drbd_tcp_uncork(mdev->tconn->meta.socket); + if (!tconn->net_conf->no_cork) + drbd_tcp_uncork(tconn->meta.socket); /* short circuit, recv_msg would return EINTR anyways. */ if (signal_pending(current)) continue; - rv = drbd_recv_short(mdev->tconn->meta.socket, buf, expect-received, 0); - clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags); + rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0); + clear_bit(SIGNAL_ASENDER, &tconn->flags); flush_signals(current); @@ -4632,47 +4649,46 @@ int drbd_asender(struct drbd_thread *thi) received += rv; buf += rv; } else if (rv == 0) { - dev_err(DEV, "meta connection shut down by peer.\n"); + conn_err(tconn, "meta connection shut down by peer.\n"); goto reconnect; } else if (rv == -EAGAIN) { /* If the data socket received something meanwhile, * that is good enough: peer is still alive. */ - if (time_after(mdev->tconn->last_received, - jiffies - mdev->tconn->meta.socket->sk->sk_rcvtimeo)) + if (time_after(tconn->last_received, + jiffies - tconn->meta.socket->sk->sk_rcvtimeo)) continue; if (ping_timeout_active) { - dev_err(DEV, "PingAck did not arrive in time.\n"); + conn_err(tconn, "PingAck did not arrive in time.\n"); goto reconnect; } - set_bit(SEND_PING, &mdev->tconn->flags); + set_bit(SEND_PING, &tconn->flags); continue; } else if (rv == -EINTR) { continue; } else { - dev_err(DEV, "sock_recvmsg returned %d\n", rv); + conn_err(tconn, "sock_recvmsg returned %d\n", rv); goto reconnect; } if (received == expect && cmd == NULL) { - if (!decode_header(mdev->tconn, h, &pi)) + if (!decode_header(tconn, h, &pi)) goto reconnect; cmd = get_asender_cmd(pi.cmd); if (unlikely(cmd == NULL)) { - dev_err(DEV, "unknown command %d on meta (l: %d)\n", + conn_err(tconn, "unknown command %d on meta (l: %d)\n", pi.cmd, pi.size); goto disconnect; } expect = cmd->pkt_size; if (pi.size != expect - sizeof(struct p_header)) { - dev_err(DEV, "Wrong packet size on meta (c: %d, l: %d)\n", + conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n", pi.cmd, pi.size); goto reconnect; } } if (received == expect) { - mdev->tconn->last_received = jiffies; - D_ASSERT(cmd != NULL); - if (!cmd->process(mdev, pi.cmd)) + tconn->last_received = jiffies; + if (!cmd->process(vnr_to_mdev(tconn, pi.vnr), pi.cmd)) goto reconnect; /* the idle_timeout (ping-int) @@ -4689,18 +4705,15 @@ int drbd_asender(struct drbd_thread *thi) if (0) { reconnect: - drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE)); - drbd_md_sync(mdev); + drbd_force_state(tconn->volume0, NS(conn, C_NETWORK_FAILURE)); } if (0) { disconnect: - drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); - drbd_md_sync(mdev); + drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING)); } - clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags); + clear_bit(SIGNAL_ASENDER, &tconn->flags); - D_ASSERT(mdev->state.conn < C_CONNECTED); - dev_info(DEV, "asender terminated\n"); + conn_info(tconn, "asender terminated\n"); return 0; } From 19393e105f9702a014d3ce08bce92b3ad9cf96b5 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 9 Feb 2011 10:09:07 +0100 Subject: [PATCH 099/609] drbd: Converted drbd_worker() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_worker.c | 97 +++++++++++++++++--------------- 1 file changed, 51 insertions(+), 46 deletions(-) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 6f709621ae2..c9b10d6eb88 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1624,35 +1624,53 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) drbd_state_unlock(mdev); } +static int _worker_dying(int vnr, void *p, void *data) +{ + struct drbd_conf *mdev = (struct drbd_conf *)p; + + D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); + /* _drbd_set_state only uses stop_nowait. + * wait here for the exiting receiver. */ + drbd_thread_stop(&mdev->tconn->receiver); + drbd_mdev_cleanup(mdev); + + clear_bit(DEVICE_DYING, &mdev->flags); + clear_bit(CONFIG_PENDING, &mdev->flags); + wake_up(&mdev->state_wait); + + return 0; +} + int drbd_worker(struct drbd_thread *thi) { - struct drbd_conf *mdev = thi->mdev; + struct drbd_tconn *tconn = thi->mdev->tconn; struct drbd_work *w = NULL; LIST_HEAD(work_list); - int intr = 0, i; + int intr = 0; while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); - if (down_trylock(&mdev->tconn->data.work.s)) { - mutex_lock(&mdev->tconn->data.mutex); - if (mdev->tconn->data.socket && !mdev->tconn->net_conf->no_cork) - drbd_tcp_uncork(mdev->tconn->data.socket); - mutex_unlock(&mdev->tconn->data.mutex); + if (down_trylock(&tconn->data.work.s)) { + mutex_lock(&tconn->data.mutex); + if (tconn->data.socket && !tconn->net_conf->no_cork) + drbd_tcp_uncork(tconn->data.socket); + mutex_unlock(&tconn->data.mutex); - intr = down_interruptible(&mdev->tconn->data.work.s); + intr = down_interruptible(&tconn->data.work.s); - mutex_lock(&mdev->tconn->data.mutex); - if (mdev->tconn->data.socket && !mdev->tconn->net_conf->no_cork) - drbd_tcp_cork(mdev->tconn->data.socket); - mutex_unlock(&mdev->tconn->data.mutex); + mutex_lock(&tconn->data.mutex); + if (tconn->data.socket && !tconn->net_conf->no_cork) + drbd_tcp_cork(tconn->data.socket); + mutex_unlock(&tconn->data.mutex); } if (intr) { - D_ASSERT(intr == -EINTR); flush_signals(current); - if (!expect(get_t_state(thi) != RUNNING)) + if (get_t_state(thi) == RUNNING) { + conn_warn(tconn, "Worker got an unexpected signal\n"); continue; + } break; } @@ -1663,8 +1681,8 @@ int drbd_worker(struct drbd_thread *thi) this... */ w = NULL; - spin_lock_irq(&mdev->tconn->data.work.q_lock); - if (!expect(!list_empty(&mdev->tconn->data.work.q))) { + spin_lock_irq(&tconn->data.work.q_lock); + if (list_empty(&tconn->data.work.q)) { /* something terribly wrong in our logic. * we were able to down() the semaphore, * but the list is empty... doh. @@ -1676,57 +1694,44 @@ int drbd_worker(struct drbd_thread *thi) * * I'll try to get away just starting over this loop. */ - spin_unlock_irq(&mdev->tconn->data.work.q_lock); + conn_warn(tconn, "Work list unexpectedly empty\n"); + spin_unlock_irq(&tconn->data.work.q_lock); continue; } - w = list_entry(mdev->tconn->data.work.q.next, struct drbd_work, list); + w = list_entry(tconn->data.work.q.next, struct drbd_work, list); list_del_init(&w->list); - spin_unlock_irq(&mdev->tconn->data.work.q_lock); + spin_unlock_irq(&tconn->data.work.q_lock); - if (!w->cb(mdev, w, mdev->state.conn < C_CONNECTED)) { + if (!w->cb(w->mdev, w, tconn->volume0->state.conn < C_CONNECTED)) { /* dev_warn(DEV, "worker: a callback failed! \n"); */ - if (mdev->state.conn >= C_CONNECTED) - drbd_force_state(mdev, - NS(conn, C_NETWORK_FAILURE)); + if (tconn->volume0->state.conn >= C_CONNECTED) + drbd_force_state(tconn->volume0, + NS(conn, C_NETWORK_FAILURE)); } } - D_ASSERT(test_bit(DEVICE_DYING, &mdev->flags)); - D_ASSERT(test_bit(CONFIG_PENDING, &mdev->flags)); - spin_lock_irq(&mdev->tconn->data.work.q_lock); - i = 0; - while (!list_empty(&mdev->tconn->data.work.q)) { - list_splice_init(&mdev->tconn->data.work.q, &work_list); - spin_unlock_irq(&mdev->tconn->data.work.q_lock); + spin_lock_irq(&tconn->data.work.q_lock); + while (!list_empty(&tconn->data.work.q)) { + list_splice_init(&tconn->data.work.q, &work_list); + spin_unlock_irq(&tconn->data.work.q_lock); while (!list_empty(&work_list)) { w = list_entry(work_list.next, struct drbd_work, list); list_del_init(&w->list); - w->cb(mdev, w, 1); - i++; /* dead debugging code */ + w->cb(w->mdev, w, 1); } - spin_lock_irq(&mdev->tconn->data.work.q_lock); + spin_lock_irq(&tconn->data.work.q_lock); } - sema_init(&mdev->tconn->data.work.s, 0); + sema_init(&tconn->data.work.s, 0); /* DANGEROUS race: if someone did queue his work within the spinlock, * but up() ed outside the spinlock, we could get an up() on the * semaphore without corresponding list entry. * So don't do that. */ - spin_unlock_irq(&mdev->tconn->data.work.q_lock); + spin_unlock_irq(&tconn->data.work.q_lock); - D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); - /* _drbd_set_state only uses stop_nowait. - * wait here for the exiting receiver. */ - drbd_thread_stop(&mdev->tconn->receiver); - drbd_mdev_cleanup(mdev); - - dev_info(DEV, "worker terminated\n"); - - clear_bit(DEVICE_DYING, &mdev->flags); - clear_bit(CONFIG_PENDING, &mdev->flags); - wake_up(&mdev->state_wait); + idr_for_each(&tconn->volumes, _worker_dying, NULL); return 0; } From 392c8801922f51466045ece2f1f2884b8c9cd9a2 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 9 Feb 2011 10:33:31 +0100 Subject: [PATCH 100/609] drbd: drbd_thread has now a pointer to a tconn instead of to a mdev Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 8 +++--- drivers/block/drbd/drbd_int.h | 4 +-- drivers/block/drbd/drbd_main.c | 43 ++++++++++++++---------------- drivers/block/drbd/drbd_receiver.c | 4 +-- drivers/block/drbd/drbd_worker.c | 2 +- 5 files changed, 29 insertions(+), 32 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index e85221f22ad..e8d652f197c 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -119,9 +119,9 @@ static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) if (!__ratelimit(&drbd_ratelimit_state)) return; dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n", - drbd_task_to_thread_name(mdev, current), + drbd_task_to_thread_name(mdev->tconn, current), func, b->bm_why ?: "?", - drbd_task_to_thread_name(mdev, b->bm_task)); + drbd_task_to_thread_name(mdev->tconn, b->bm_task)); } void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) @@ -138,9 +138,9 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) if (trylock_failed) { dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n", - drbd_task_to_thread_name(mdev, current), + drbd_task_to_thread_name(mdev->tconn, current), why, b->bm_why ?: "?", - drbd_task_to_thread_name(mdev, b->bm_task)); + drbd_task_to_thread_name(mdev->tconn, b->bm_task)); mutex_lock(&b->bm_change); } if (BM_LOCKED_MASK & b->bm_flags) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index be067bfbace..91054e4d0b2 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -625,7 +625,7 @@ struct drbd_thread { struct completion stop; enum drbd_thread_state t_state; int (*function) (struct drbd_thread *); - struct drbd_conf *mdev; + struct drbd_tconn *tconn; int reset_cpu_mask; char name[9]; }; @@ -1151,7 +1151,7 @@ enum dds_flags { extern void drbd_init_set_defaults(struct drbd_conf *mdev); extern int drbd_thread_start(struct drbd_thread *thi); extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait); -extern char *drbd_task_to_thread_name(struct drbd_conf *mdev, struct task_struct *task); +extern char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *task); #ifdef CONFIG_SMP extern void drbd_thread_current_set_cpu(struct drbd_thread *thi); extern void drbd_calc_cpu_mask(struct drbd_tconn *tconn); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 0861746a747..2c44cc36dee 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -442,12 +442,12 @@ void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) static int drbd_thread_setup(void *arg) { struct drbd_thread *thi = (struct drbd_thread *) arg; - struct drbd_conf *mdev = thi->mdev; + struct drbd_tconn *tconn = thi->tconn; unsigned long flags; int retval; snprintf(current->comm, sizeof(current->comm), "drbd_%c_%s", - thi->name[0], thi->mdev->tconn->name); + thi->name[0], thi->tconn->name); restart: retval = thi->function(thi); @@ -465,7 +465,7 @@ restart: */ if (thi->t_state == RESTARTING) { - dev_info(DEV, "Restarting %s thread\n", thi->name); + conn_info(tconn, "Restarting %s thread\n", thi->name); thi->t_state = RUNNING; spin_unlock_irqrestore(&thi->t_lock, flags); goto restart; @@ -477,27 +477,27 @@ restart: complete(&thi->stop); spin_unlock_irqrestore(&thi->t_lock, flags); - dev_info(DEV, "Terminating %s\n", current->comm); + conn_info(tconn, "Terminating %s\n", current->comm); /* Release mod reference taken when thread was started */ module_put(THIS_MODULE); return retval; } -static void drbd_thread_init(struct drbd_conf *mdev, struct drbd_thread *thi, +static void drbd_thread_init(struct drbd_tconn *tconn, struct drbd_thread *thi, int (*func) (struct drbd_thread *), char *name) { spin_lock_init(&thi->t_lock); thi->task = NULL; thi->t_state = NONE; thi->function = func; - thi->mdev = mdev; + thi->tconn = tconn; strncpy(thi->name, name, ARRAY_SIZE(thi->name)); } int drbd_thread_start(struct drbd_thread *thi) { - struct drbd_conf *mdev = thi->mdev; + struct drbd_tconn *tconn = thi->tconn; struct task_struct *nt; unsigned long flags; @@ -507,28 +507,27 @@ int drbd_thread_start(struct drbd_thread *thi) switch (thi->t_state) { case NONE: - dev_info(DEV, "Starting %s thread (from %s [%d])\n", + conn_info(tconn, "Starting %s thread (from %s [%d])\n", thi->name, current->comm, current->pid); /* Get ref on module for thread - this is released when thread exits */ if (!try_module_get(THIS_MODULE)) { - dev_err(DEV, "Failed to get module reference in drbd_thread_start\n"); + conn_err(tconn, "Failed to get module reference in drbd_thread_start\n"); spin_unlock_irqrestore(&thi->t_lock, flags); return false; } init_completion(&thi->stop); - D_ASSERT(thi->task == NULL); thi->reset_cpu_mask = 1; thi->t_state = RUNNING; spin_unlock_irqrestore(&thi->t_lock, flags); flush_signals(current); /* otherw. may get -ERESTARTNOINTR */ nt = kthread_create(drbd_thread_setup, (void *) thi, - "drbd%d_%s", mdev_to_minor(mdev), thi->name); + "drbd_%c_%s", thi->name[0], thi->tconn->name); if (IS_ERR(nt)) { - dev_err(DEV, "Couldn't start thread\n"); + conn_err(tconn, "Couldn't start thread\n"); module_put(THIS_MODULE); return false; @@ -541,7 +540,7 @@ int drbd_thread_start(struct drbd_thread *thi) break; case EXITING: thi->t_state = RESTARTING; - dev_info(DEV, "Restarting %s thread (from %s [%d])\n", + conn_info(tconn, "Restarting %s thread (from %s [%d])\n", thi->name, current->comm, current->pid); /* fall through */ case RUNNING: @@ -582,7 +581,6 @@ void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait) init_completion(&thi->stop); if (thi->task != current) force_sig(DRBD_SIGKILL, thi->task); - } spin_unlock_irqrestore(&thi->t_lock, flags); @@ -591,9 +589,8 @@ void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait) wait_for_completion(&thi->stop); } -static struct drbd_thread *drbd_task_to_thread(struct drbd_conf *mdev, struct task_struct *task) +static struct drbd_thread *drbd_task_to_thread(struct drbd_tconn *tconn, struct task_struct *task) { - struct drbd_tconn *tconn = mdev->tconn; struct drbd_thread *thi = task == tconn->receiver.task ? &tconn->receiver : task == tconn->asender.task ? &tconn->asender : @@ -602,9 +599,9 @@ static struct drbd_thread *drbd_task_to_thread(struct drbd_conf *mdev, struct ta return thi; } -char *drbd_task_to_thread_name(struct drbd_conf *mdev, struct task_struct *task) +char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *task) { - struct drbd_thread *thi = drbd_task_to_thread(mdev, task); + struct drbd_thread *thi = drbd_task_to_thread(tconn, task); return thi ? thi->name : task->comm; } @@ -656,7 +653,7 @@ void drbd_thread_current_set_cpu(struct drbd_thread *thi) if (!thi->reset_cpu_mask) return; thi->reset_cpu_mask = 0; - set_cpus_allowed_ptr(p, thi->mdev->tconn->cpu_mask); + set_cpus_allowed_ptr(p, thi->tconn->cpu_mask); } #endif @@ -1866,10 +1863,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) init_waitqueue_head(&mdev->al_wait); init_waitqueue_head(&mdev->seq_wait); - drbd_thread_init(mdev, &mdev->tconn->receiver, drbdd_init, "receiver"); - drbd_thread_init(mdev, &mdev->tconn->worker, drbd_worker, "worker"); - drbd_thread_init(mdev, &mdev->tconn->asender, drbd_asender, "asender"); - /* mdev->tconn->agreed_pro_version gets initialized in drbd_connect() */ mdev->write_ordering = WO_bdev_flush; mdev->resync_wenr = LC_FREE; @@ -2202,6 +2195,10 @@ struct drbd_tconn *drbd_new_tconn(char *name) init_waitqueue_head(&tconn->net_cnt_wait); idr_init(&tconn->volumes); + drbd_thread_init(tconn, &tconn->receiver, drbdd_init, "receiver"); + drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker"); + drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender"); + write_lock_irq(&global_state_lock); list_add(&tconn->all_tconn, &drbd_tconns); write_unlock_irq(&global_state_lock); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index f0cd3819fff..f21b0efff6d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4224,7 +4224,7 @@ static int drbd_do_auth(struct drbd_tconn *tconn) int drbdd_init(struct drbd_thread *thi) { - struct drbd_tconn *tconn = thi->mdev->tconn; + struct drbd_tconn *tconn = thi->tconn; int h; conn_info(tconn, "receiver (re)started\n"); @@ -4591,7 +4591,7 @@ static int tconn_process_done_ee(struct drbd_tconn *tconn) int drbd_asender(struct drbd_thread *thi) { - struct drbd_tconn *tconn = thi->mdev->tconn; + struct drbd_tconn *tconn = thi->tconn; struct p_header *h = &tconn->meta.rbuf.header; struct asender_cmd *cmd = NULL; struct packet_info pi; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index c9b10d6eb88..3f0f84a56ee 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1643,7 +1643,7 @@ static int _worker_dying(int vnr, void *p, void *data) int drbd_worker(struct drbd_thread *thi) { - struct drbd_tconn *tconn = thi->mdev->tconn; + struct drbd_tconn *tconn = thi->tconn; struct drbd_work *w = NULL; LIST_HEAD(work_list); int intr = 0; From 6699b6553374e85785fada94ac1e8dfc5629b02e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 9 Feb 2011 11:10:24 +0100 Subject: [PATCH 101/609] drbd: Moved some initializing code into drbd_new_tconn() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2c44cc36dee..b3f5a02cdb8 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1801,17 +1801,9 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) atomic_set(&mdev->ap_in_flight, 0); mutex_init(&mdev->md_io_mutex); - mutex_init(&mdev->tconn->data.mutex); - mutex_init(&mdev->tconn->meta.mutex); - sema_init(&mdev->tconn->data.work.s, 0); - sema_init(&mdev->tconn->meta.work.s, 0); mutex_init(&mdev->state_mutex); - spin_lock_init(&mdev->tconn->data.work.q_lock); - spin_lock_init(&mdev->tconn->meta.work.q_lock); - spin_lock_init(&mdev->al_lock); - spin_lock_init(&mdev->tconn->req_lock); spin_lock_init(&mdev->peer_seq_lock); spin_lock_init(&mdev->epoch_lock); @@ -1821,8 +1813,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) INIT_LIST_HEAD(&mdev->read_ee); INIT_LIST_HEAD(&mdev->net_ee); INIT_LIST_HEAD(&mdev->resync_reads); - INIT_LIST_HEAD(&mdev->tconn->data.work.q); - INIT_LIST_HEAD(&mdev->tconn->meta.work.q); INIT_LIST_HEAD(&mdev->resync_work.list); INIT_LIST_HEAD(&mdev->unplug_work.list); INIT_LIST_HEAD(&mdev->go_diskless.list); @@ -2179,6 +2169,13 @@ out: return r; } +static void drbd_init_workqueue(struct drbd_work_queue* wq) +{ + sema_init(&wq->s, 0); + spin_lock_init(&wq->q_lock); + INIT_LIST_HEAD(&wq->q); +} + struct drbd_tconn *drbd_new_tconn(char *name) { struct drbd_tconn *tconn; @@ -2191,10 +2188,17 @@ struct drbd_tconn *drbd_new_tconn(char *name) if (!tconn->name) goto fail; + spin_lock_init(&tconn->req_lock); atomic_set(&tconn->net_cnt, 0); init_waitqueue_head(&tconn->net_cnt_wait); idr_init(&tconn->volumes); + drbd_init_workqueue(&tconn->data.work); + mutex_init(&tconn->data.mutex); + + drbd_init_workqueue(&tconn->meta.work); + mutex_init(&tconn->meta.mutex); + drbd_thread_init(tconn, &tconn->receiver, drbdd_init, "receiver"); drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker"); drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender"); From 00d56944ff086f895e9ad184a7785ca1eece4a3b Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 9 Feb 2011 18:09:48 +0100 Subject: [PATCH 102/609] drbd: Generalized the work callbacks No longer work callbacks must operate on a mdev. From now on they can also operate on a tconn. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 8 +-- drivers/block/drbd/drbd_int.h | 45 ++++++++-------- drivers/block/drbd/drbd_main.c | 17 ++++--- drivers/block/drbd/drbd_receiver.c | 13 +++-- drivers/block/drbd/drbd_state.c | 5 +- drivers/block/drbd/drbd_worker.c | 82 ++++++++++++++++++------------ 6 files changed, 100 insertions(+), 70 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 637a9378567..0748871d6b1 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -63,7 +63,7 @@ struct drbd_atodb_wait { }; -int w_al_write_transaction(struct drbd_conf *, struct drbd_work *, int); +int w_al_write_transaction(struct drbd_work *, int); static int _drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, @@ -291,9 +291,10 @@ static unsigned int rs_extent_to_bm_page(unsigned int rs_enr) } int -w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) +w_al_write_transaction(struct drbd_work *w, int unused) { struct update_al_work *aw = container_of(w, struct update_al_work, w); + struct drbd_conf *mdev = w->mdev; struct lc_element *updated = aw->al_ext; const unsigned int new_enr = aw->enr; const unsigned int evicted = aw->old_enr; @@ -612,9 +613,10 @@ void drbd_al_shrink(struct drbd_conf *mdev) wake_up(&mdev->al_wait); } -static int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused) +static int w_update_odbm(struct drbd_work *w, int unused) { struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w); + struct drbd_conf *mdev = w->mdev; if (!get_ldev(mdev)) { if (__ratelimit(&drbd_ratelimit_state)) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 91054e4d0b2..8f9cc9d1bf9 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -641,11 +641,14 @@ static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi) } struct drbd_work; -typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel); +typedef int (*drbd_work_cb)(struct drbd_work *, int cancel); struct drbd_work { struct list_head list; drbd_work_cb cb; - struct drbd_conf *mdev; + union { + struct drbd_conf *mdev; + struct drbd_tconn *tconn; + }; }; #include "drbd_interval.h" @@ -1495,25 +1498,25 @@ extern void drbd_csum_bio(struct drbd_conf *, struct crypto_hash *, struct bio * extern void drbd_csum_ee(struct drbd_conf *, struct crypto_hash *, struct drbd_peer_request *, void *); /* worker callbacks */ -extern int w_req_cancel_conflict(struct drbd_conf *, struct drbd_work *, int); -extern int w_read_retry_remote(struct drbd_conf *, struct drbd_work *, int); -extern int w_e_end_data_req(struct drbd_conf *, struct drbd_work *, int); -extern int w_e_end_rsdata_req(struct drbd_conf *, struct drbd_work *, int); -extern int w_e_end_csum_rs_req(struct drbd_conf *, struct drbd_work *, int); -extern int w_e_end_ov_reply(struct drbd_conf *, struct drbd_work *, int); -extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int); -extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); -extern int w_resync_timer(struct drbd_conf *, struct drbd_work *, int); -extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); -extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); -extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); -extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int); -extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int); -extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int); -extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int); -extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int); -extern int w_send_oos(struct drbd_conf *, struct drbd_work *, int); -extern int w_start_resync(struct drbd_conf *, struct drbd_work *, int); +extern int w_req_cancel_conflict(struct drbd_work *, int); +extern int w_read_retry_remote(struct drbd_work *, int); +extern int w_e_end_data_req(struct drbd_work *, int); +extern int w_e_end_rsdata_req(struct drbd_work *, int); +extern int w_e_end_csum_rs_req(struct drbd_work *, int); +extern int w_e_end_ov_reply(struct drbd_work *, int); +extern int w_e_end_ov_req(struct drbd_work *, int); +extern int w_ov_finished(struct drbd_work *, int); +extern int w_resync_timer(struct drbd_work *, int); +extern int w_resume_next_sg(struct drbd_work *, int); +extern int w_send_write_hint(struct drbd_work *, int); +extern int w_send_dblock(struct drbd_work *, int); +extern int w_send_barrier(struct drbd_work *, int); +extern int w_send_read_req(struct drbd_work *, int); +extern int w_prev_work_done(struct drbd_work *, int); +extern int w_e_reissue(struct drbd_work *, int); +extern int w_restart_disk_io(struct drbd_work *, int); +extern int w_send_oos(struct drbd_work *, int); +extern int w_start_resync(struct drbd_work *, int); extern void resync_timer_fn(unsigned long data); extern void start_resync_timer_fn(unsigned long data); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index b3f5a02cdb8..d418bca2bb1 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -64,10 +64,10 @@ int drbd_asender(struct drbd_thread *); int drbd_init(void); static int drbd_open(struct block_device *bdev, fmode_t mode); static int drbd_release(struct gendisk *gd, fmode_t mode); -static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused); +static int w_md_sync(struct drbd_work *w, int unused); static void md_sync_timer_fn(unsigned long data); -static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); -static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused); +static int w_bitmap_io(struct drbd_work *w, int unused); +static int w_go_diskless(struct drbd_work *w, int unused); MODULE_AUTHOR("Philipp Reisner , " "Lars Ellenberg "); @@ -2790,9 +2790,10 @@ int drbd_bmio_clear_n_write(struct drbd_conf *mdev) return rv; } -static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused) +static int w_bitmap_io(struct drbd_work *w, int unused) { struct bm_io_work *work = container_of(w, struct bm_io_work, w); + struct drbd_conf *mdev = w->mdev; int rv = -EIO; D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0); @@ -2835,8 +2836,10 @@ void drbd_ldev_destroy(struct drbd_conf *mdev) clear_bit(GO_DISKLESS, &mdev->flags); } -static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused) +static int w_go_diskless(struct drbd_work *w, int unused) { + struct drbd_conf *mdev = w->mdev; + D_ASSERT(mdev->state.disk == D_FAILED); /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will * inc/dec it frequently. Once we are D_DISKLESS, no one will touch @@ -2949,8 +2952,10 @@ static void md_sync_timer_fn(unsigned long data) drbd_queue_work_front(&mdev->tconn->data.work, &mdev->md_sync_work); } -static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused) +static int w_md_sync(struct drbd_work *w, int unused) { + struct drbd_conf *mdev = w->mdev; + dev_warn(DEV, "md_sync_timer expired! Worker calls drbd_md_sync().\n"); #ifdef DEBUG dev_warn(DEV, "last md_mark_dirty: %s:%u\n", diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index f21b0efff6d..02fa1b25dce 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -65,7 +65,7 @@ static int drbd_do_auth(struct drbd_tconn *tconn); static int drbd_disconnected(int vnr, void *p, void *data); static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event); -static int e_end_block(struct drbd_conf *, struct drbd_work *, int); +static int e_end_block(struct drbd_work *, int); #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) @@ -420,7 +420,7 @@ static int drbd_process_done_ee(struct drbd_conf *mdev) */ list_for_each_entry_safe(peer_req, t, &work_list, w.list) { /* list_del not necessary, next/prev members not touched */ - ok = peer_req->w.cb(mdev, &peer_req->w, !ok) && ok; + ok = peer_req->w.cb(&peer_req->w, !ok) && ok; drbd_free_ee(mdev, peer_req); } wake_up(&mdev->ee_wait); @@ -1447,9 +1447,10 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, /* e_end_resync_block() is called via * drbd_process_done_ee() by asender only */ -static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused) +static int e_end_resync_block(struct drbd_work *w, int unused) { struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w; + struct drbd_conf *mdev = w->mdev; sector_t sector = peer_req->i.sector; int ok; @@ -1584,9 +1585,10 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd, /* e_end_block() is called via drbd_process_done_ee(). * this means this function only runs in the asender thread */ -static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +static int e_end_block(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w; + struct drbd_conf *mdev = w->mdev; sector_t sector = peer_req->i.sector; int ok = 1, pcmd; @@ -1621,9 +1623,10 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return ok; } -static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int unused) +static int e_send_discard_ack(struct drbd_work *w, int unused) { struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w; + struct drbd_conf *mdev = w->mdev; int ok = 1; D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 36679841af6..30a3a1de07c 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -40,7 +40,7 @@ struct after_state_chg_work { extern void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what); int drbd_send_state_req(struct drbd_conf *, union drbd_state, union drbd_state); -static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused); +static int w_after_state_ch(struct drbd_work *w, int unused); static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); static void after_conn_state_ch(struct drbd_tconn *tconn, union drbd_state os, @@ -853,10 +853,11 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, return rv; } -static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused) +static int w_after_state_ch(struct drbd_work *w, int unused) { struct after_state_chg_work *ascw = container_of(w, struct after_state_chg_work, w); + struct drbd_conf *mdev = w->mdev; after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags); if (ascw->flags & CS_WAIT_COMPLETE) { diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 3f0f84a56ee..418f44ad9a8 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -38,9 +38,8 @@ #include "drbd_int.h" #include "drbd_req.h" -static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel); -static int w_make_resync_request(struct drbd_conf *mdev, - struct drbd_work *w, int cancel); +static int w_make_ov_request(struct drbd_work *w, int cancel); +static int w_make_resync_request(struct drbd_work *w, int cancel); @@ -228,9 +227,10 @@ void drbd_endio_pri(struct bio *bio, int error) complete_master_bio(mdev, &m); } -int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_read_retry_remote(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); + struct drbd_conf *mdev = w->mdev; /* We should not detach for read io-error, * but try to WRITE the P_DATA_REPLY to the failed location, @@ -244,7 +244,7 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } spin_unlock_irq(&mdev->tconn->req_lock); - return w_send_read_req(mdev, w, 0); + return w_send_read_req(w, 0); } void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, @@ -295,11 +295,10 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio * crypto_hash_final(&desc, digest); } -/* TODO merge common code with w_e_end_ov_req */ -int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +static int w_e_send_csum(struct drbd_work *w, int cancel) { - struct drbd_peer_request *peer_req = - container_of(w, struct drbd_peer_request, w); + struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); + struct drbd_conf *mdev = w->mdev; int digest_size; void *digest; int ok = 1; @@ -383,14 +382,15 @@ defer: return -EAGAIN; } -int w_resync_timer(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_resync_timer(struct drbd_work *w, int cancel) { + struct drbd_conf *mdev = w->mdev; switch (mdev->state.conn) { case C_VERIFY_S: - w_make_ov_request(mdev, w, cancel); + w_make_ov_request(w, cancel); break; case C_SYNC_TARGET: - w_make_resync_request(mdev, w, cancel); + w_make_resync_request(w, cancel); break; } @@ -504,9 +504,9 @@ static int drbd_rs_number_requests(struct drbd_conf *mdev) return number; } -static int w_make_resync_request(struct drbd_conf *mdev, - struct drbd_work *w, int cancel) +static int w_make_resync_request(struct drbd_work *w, int cancel) { + struct drbd_conf *mdev = w->mdev; unsigned long bit; sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); @@ -664,8 +664,9 @@ next_sector: return 1; } -static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +static int w_make_ov_request(struct drbd_work *w, int cancel) { + struct drbd_conf *mdev = w->mdev; int number, i, size; sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); @@ -707,8 +708,9 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca return 1; } -int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_ov_finished(struct drbd_work *w, int cancel) { + struct drbd_conf *mdev = w->mdev; kfree(w); ov_oos_print(mdev); drbd_resync_finished(mdev); @@ -716,8 +718,9 @@ int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return 1; } -static int w_resync_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +static int w_resync_finished(struct drbd_work *w, int cancel) { + struct drbd_conf *mdev = w->mdev; kfree(w); drbd_resync_finished(mdev); @@ -901,9 +904,10 @@ static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_requ * @w: work object. * @cancel: The connection will be closed anyways */ -int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_e_end_data_req(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); + struct drbd_conf *mdev = w->mdev; int ok; if (unlikely(cancel)) { @@ -937,9 +941,10 @@ int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * @w: work object. * @cancel: The connection will be closed anyways */ -int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_e_end_rsdata_req(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); + struct drbd_conf *mdev = w->mdev; int ok; if (unlikely(cancel)) { @@ -985,9 +990,10 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return ok; } -int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); + struct drbd_conf *mdev = w->mdev; struct digest_info *di; int digest_size; void *digest = NULL; @@ -1047,10 +1053,10 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return ok; } -/* TODO merge common code with w_e_send_csum */ -int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_e_end_ov_req(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); + struct drbd_conf *mdev = w->mdev; sector_t sector = peer_req->i.sector; unsigned int size = peer_req->i.size; int digest_size; @@ -1105,9 +1111,10 @@ void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size) drbd_set_out_of_sync(mdev, sector, size); } -int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_e_end_ov_reply(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); + struct drbd_conf *mdev = w->mdev; struct digest_info *di; void *digest; sector_t sector = peer_req->i.sector; @@ -1172,16 +1179,18 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return ok; } -int w_prev_work_done(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_prev_work_done(struct drbd_work *w, int cancel) { struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w); + complete(&b->done); return 1; } -int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_send_barrier(struct drbd_work *w, int cancel) { struct drbd_tl_epoch *b = container_of(w, struct drbd_tl_epoch, w); + struct drbd_conf *mdev = w->mdev; struct p_barrier *p = &mdev->tconn->data.sbuf.barrier; int ok = 1; @@ -1210,16 +1219,18 @@ int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return ok; } -int w_send_write_hint(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_send_write_hint(struct drbd_work *w, int cancel) { + struct drbd_conf *mdev = w->mdev; if (cancel) return 1; return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE); } -int w_send_oos(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_send_oos(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); + struct drbd_conf *mdev = w->mdev; int ok; if (unlikely(cancel)) { @@ -1239,9 +1250,10 @@ int w_send_oos(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * @w: work object. * @cancel: The connection will be closed anyways */ -int w_send_dblock(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_send_dblock(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); + struct drbd_conf *mdev = w->mdev; int ok; if (unlikely(cancel)) { @@ -1261,9 +1273,10 @@ int w_send_dblock(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * @w: work object. * @cancel: The connection will be closed anyways */ -int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_send_read_req(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); + struct drbd_conf *mdev = w->mdev; int ok; if (unlikely(cancel)) { @@ -1285,9 +1298,10 @@ int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return ok; } -int w_restart_disk_io(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_restart_disk_io(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); + struct drbd_conf *mdev = w->mdev; if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG) drbd_al_begin_io(mdev, req->i.sector); @@ -1447,8 +1461,10 @@ void start_resync_timer_fn(unsigned long data) drbd_queue_work(&mdev->tconn->data.work, &mdev->start_resync_work); } -int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_start_resync(struct drbd_work *w, int cancel) { + struct drbd_conf *mdev = w->mdev; + if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) { dev_warn(DEV, "w_start_resync later...\n"); mdev->start_resync_timer.expires = jiffies + HZ/10; @@ -1702,7 +1718,7 @@ int drbd_worker(struct drbd_thread *thi) list_del_init(&w->list); spin_unlock_irq(&tconn->data.work.q_lock); - if (!w->cb(w->mdev, w, tconn->volume0->state.conn < C_CONNECTED)) { + if (!w->cb(w, tconn->volume0->state.conn < C_CONNECTED)) { /* dev_warn(DEV, "worker: a callback failed! \n"); */ if (tconn->volume0->state.conn >= C_CONNECTED) drbd_force_state(tconn->volume0, @@ -1718,7 +1734,7 @@ int drbd_worker(struct drbd_thread *thi) while (!list_empty(&work_list)) { w = list_entry(work_list.next, struct drbd_work, list); list_del_init(&w->list); - w->cb(w->mdev, w, 1); + w->cb(w, 1); } spin_lock_irq(&tconn->data.work.q_lock); From 2a67d8b93b3363d4a5608d16d510a4bf6b3863fb Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 9 Feb 2011 14:10:32 +0100 Subject: [PATCH 103/609] drbd: Converted drbd_send_ping() and related functions from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 21 ++++++++++++++------- drivers/block/drbd/drbd_main.c | 17 +++++++++-------- drivers/block/drbd/drbd_receiver.c | 11 ++++++----- drivers/block/drbd/drbd_state.c | 1 + drivers/block/drbd/drbd_worker.c | 10 ++++++---- 5 files changed, 36 insertions(+), 24 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8f9cc9d1bf9..e2b59f58a0a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -790,7 +790,6 @@ enum { RESIZE_PENDING, /* Size change detected locally, waiting for the response from * the peer, if it changed there as well. */ CONN_DRY_RUN, /* Expect disconnect after resync handshake. */ - GOT_PING_ACK, /* set when we receive a ping_ack packet, misc wait gets woken */ NEW_CUR_UUID, /* Create new current UUID when thawing IO */ AL_SUSPENDED, /* Activity logging is currently suspended. */ AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */ @@ -913,6 +912,7 @@ enum { DISCARD_CONCURRENT, /* Set on one node, cleared on the peer! */ SEND_PING, /* whether asender should send a ping asap */ SIGNAL_ASENDER, /* whether asender wants to be interrupted */ + GOT_PING_ACK, /* set when we receive a ping_ack packet, ping_wait gets woken */ }; struct drbd_tconn { /* is a resource from the config file */ @@ -925,6 +925,7 @@ struct drbd_tconn { /* is a resource from the config file */ struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ atomic_t net_cnt; /* Users of net_conf */ wait_queue_head_t net_cnt_wait; + wait_queue_head_t ping_wait; /* Woken upon reception of a ping, and a state change */ struct drbd_socket data; /* data/barrier/cstate/parameter packets */ struct drbd_socket meta; /* ping/ack (metadata) packets */ @@ -1180,12 +1181,12 @@ extern int drbd_send_state(struct drbd_conf *mdev); extern int _conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct socket *sock, enum drbd_packet cmd, struct p_header *h, size_t size, unsigned msg_flags); +extern int conn_send_cmd(struct drbd_tconn *tconn, int vnr, int use_data_socket, + enum drbd_packet cmd, struct p_header *h, size_t size); extern int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data, size_t size); #define USE_DATA_SOCKET 1 #define USE_META_SOCKET 0 -extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, - enum drbd_packet cmd, struct p_header *h, size_t size); extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc); extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size); @@ -1886,6 +1887,12 @@ static inline int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, return _conn_send_cmd(mdev->tconn, mdev->vnr, sock, cmd, h, size, msg_flags); } +static inline int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, + enum drbd_packet cmd, struct p_header *h, size_t size) +{ + return conn_send_cmd(mdev->tconn, mdev->vnr, use_data_socket, cmd, h, size); +} + static inline int drbd_send_short_cmd(struct drbd_conf *mdev, enum drbd_packet cmd) { @@ -1893,16 +1900,16 @@ static inline int drbd_send_short_cmd(struct drbd_conf *mdev, return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h)); } -static inline int drbd_send_ping(struct drbd_conf *mdev) +static inline int drbd_send_ping(struct drbd_tconn *tconn) { struct p_header h; - return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h)); + return conn_send_cmd(tconn, 0, USE_META_SOCKET, P_PING, &h, sizeof(h)); } -static inline int drbd_send_ping_ack(struct drbd_conf *mdev) +static inline int drbd_send_ping_ack(struct drbd_tconn *tconn) { struct p_header h; - return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h)); + return conn_send_cmd(tconn, 0, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h)); } static inline void drbd_thread_stop(struct drbd_thread *thi) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index d418bca2bb1..b43ad87a536 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -707,29 +707,29 @@ int _conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct socket *sock, /* don't pass the socket. we may only look at it * when we hold the appropriate socket mutex. */ -int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, +int conn_send_cmd(struct drbd_tconn *tconn, int vnr, int use_data_socket, enum drbd_packet cmd, struct p_header *h, size_t size) { int ok = 0; struct socket *sock; if (use_data_socket) { - mutex_lock(&mdev->tconn->data.mutex); - sock = mdev->tconn->data.socket; + mutex_lock(&tconn->data.mutex); + sock = tconn->data.socket; } else { - mutex_lock(&mdev->tconn->meta.mutex); - sock = mdev->tconn->meta.socket; + mutex_lock(&tconn->meta.mutex); + sock = tconn->meta.socket; } /* drbd_disconnect() could have called drbd_free_sock() * while we were waiting in down()... */ if (likely(sock != NULL)) - ok = _drbd_send_cmd(mdev, sock, cmd, h, size, 0); + ok = _conn_send_cmd(tconn, vnr, sock, cmd, h, size, 0); if (use_data_socket) - mutex_unlock(&mdev->tconn->data.mutex); + mutex_unlock(&tconn->data.mutex); else - mutex_unlock(&mdev->tconn->meta.mutex); + mutex_unlock(&tconn->meta.mutex); return ok; } @@ -2191,6 +2191,7 @@ struct drbd_tconn *drbd_new_tconn(char *name) spin_lock_init(&tconn->req_lock); atomic_set(&tconn->net_cnt, 0); init_waitqueue_head(&tconn->net_cnt_wait); + init_waitqueue_head(&tconn->ping_wait); idr_init(&tconn->volumes); drbd_init_workqueue(&tconn->data.work); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 02fa1b25dce..2b69a15a55d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4279,16 +4279,17 @@ static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd) static int got_Ping(struct drbd_conf *mdev, enum drbd_packet cmd) { - return drbd_send_ping_ack(mdev); + return drbd_send_ping_ack(mdev->tconn); } static int got_PingAck(struct drbd_conf *mdev, enum drbd_packet cmd) { + struct drbd_tconn *tconn = mdev->tconn; /* restore idle timeout */ - mdev->tconn->meta.socket->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ; - if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags)) - wake_up(&mdev->misc_wait); + tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ; + if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags)) + wake_up(&tconn->ping_wait); return true; } @@ -4610,7 +4611,7 @@ int drbd_asender(struct drbd_thread *thi) while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); if (test_and_clear_bit(SEND_PING, &tconn->flags)) { - if (!drbd_send_ping(tconn->volume0)) { + if (!drbd_send_ping(tconn)) { conn_err(tconn, "drbd_send_ping has failed\n"); goto reconnect; } diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 30a3a1de07c..d5777159a2b 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -737,6 +737,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, wake_up(&mdev->misc_wait); wake_up(&mdev->state_wait); + wake_up(&mdev->tconn->ping_wait); /* aborted verify run. log the last position */ if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 418f44ad9a8..8539df25bc2 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -730,10 +730,12 @@ static int w_resync_finished(struct drbd_work *w, int cancel) static void ping_peer(struct drbd_conf *mdev) { - clear_bit(GOT_PING_ACK, &mdev->flags); - request_ping(mdev->tconn); - wait_event(mdev->misc_wait, - test_bit(GOT_PING_ACK, &mdev->flags) || mdev->state.conn < C_CONNECTED); + struct drbd_tconn *tconn = mdev->tconn; + + clear_bit(GOT_PING_ACK, &tconn->flags); + request_ping(tconn); + wait_event(tconn->ping_wait, + test_bit(GOT_PING_ACK, &tconn->flags) || mdev->state.conn < C_CONNECTED); } int drbd_resync_finished(struct drbd_conf *mdev) From d50eee21c45769252f0b54a5804e8b2db735d288 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 9 Feb 2011 14:38:52 +0100 Subject: [PATCH 104/609] drbd: Extracted after_conn_state_ch() out of after_state_ch() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index d5777159a2b..18feba6a27e 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -45,10 +45,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); static void after_conn_state_ch(struct drbd_tconn *tconn, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); -static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); -static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state); -static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, - union drbd_state ns, const char **warn_sync_abort); /** * cl_wide_st_chg() - true if the state change is a cluster wide one @@ -98,6 +94,15 @@ void drbd_force_state(struct drbd_conf *mdev, drbd_change_state(mdev, CS_HARD, mask, val); } +static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); +static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *, + union drbd_state, + union drbd_state); +static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, + union drbd_state ns, const char **warn_sync_abort); +int drbd_send_state_req(struct drbd_conf *, + union drbd_state, union drbd_state); + static enum drbd_state_rv _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val) @@ -123,7 +128,7 @@ _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, if (!rv) { rv = is_valid_state(mdev, ns); if (rv == SS_SUCCESS) { - rv = is_valid_soft_transition(os, ns); + rv = is_valid_state_transition(mdev, ns, os); if (rv == SS_SUCCESS) rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ } @@ -166,7 +171,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, if (cl_wide_st_chg(mdev, os, ns)) { rv = is_valid_state(mdev, ns); if (rv == SS_SUCCESS) - rv = is_valid_soft_transition(os, ns); + rv = is_valid_state_transition(mdev, ns, os); spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); if (rv < SS_SUCCESS) { @@ -339,13 +344,14 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) } /** - * is_valid_soft_transition() - Returns an SS_ error code if the state transition is not possible + * is_valid_state_transition() - Returns an SS_ error code if the state transition is not possible * @mdev: DRBD device. * @ns: new state. * @os: old state. */ static enum drbd_state_rv -is_valid_soft_transition(union drbd_state os, union drbd_state ns) +is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns, + union drbd_state os) { enum drbd_state_rv rv = SS_SUCCESS; @@ -651,9 +657,9 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, this happen...*/ if (is_valid_state(mdev, os) == rv) - rv = is_valid_soft_transition(os, ns); + rv = is_valid_state_transition(mdev, ns, os); } else - rv = is_valid_soft_transition(os, ns); + rv = is_valid_state_transition(mdev, ns, os); } if (rv < SS_SUCCESS) { From a75f34ad0cb37f6fc03ffb109a9a702668b67fe2 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 9 Feb 2011 15:10:33 +0100 Subject: [PATCH 105/609] drbd: Renamed is_valid_state_transition() to is_valid_soft_transition() And removed the unused mdev parameter, and made the order of the state parameters: os, ns Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 18feba6a27e..d5777159a2b 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -45,6 +45,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); static void after_conn_state_ch(struct drbd_tconn *tconn, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); +static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); +static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state); +static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, + union drbd_state ns, const char **warn_sync_abort); /** * cl_wide_st_chg() - true if the state change is a cluster wide one @@ -94,15 +98,6 @@ void drbd_force_state(struct drbd_conf *mdev, drbd_change_state(mdev, CS_HARD, mask, val); } -static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); -static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *, - union drbd_state, - union drbd_state); -static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, - union drbd_state ns, const char **warn_sync_abort); -int drbd_send_state_req(struct drbd_conf *, - union drbd_state, union drbd_state); - static enum drbd_state_rv _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val) @@ -128,7 +123,7 @@ _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, if (!rv) { rv = is_valid_state(mdev, ns); if (rv == SS_SUCCESS) { - rv = is_valid_state_transition(mdev, ns, os); + rv = is_valid_soft_transition(os, ns); if (rv == SS_SUCCESS) rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ } @@ -171,7 +166,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, if (cl_wide_st_chg(mdev, os, ns)) { rv = is_valid_state(mdev, ns); if (rv == SS_SUCCESS) - rv = is_valid_state_transition(mdev, ns, os); + rv = is_valid_soft_transition(os, ns); spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); if (rv < SS_SUCCESS) { @@ -344,14 +339,13 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) } /** - * is_valid_state_transition() - Returns an SS_ error code if the state transition is not possible + * is_valid_soft_transition() - Returns an SS_ error code if the state transition is not possible * @mdev: DRBD device. * @ns: new state. * @os: old state. */ static enum drbd_state_rv -is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns, - union drbd_state os) +is_valid_soft_transition(union drbd_state os, union drbd_state ns) { enum drbd_state_rv rv = SS_SUCCESS; @@ -657,9 +651,9 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, this happen...*/ if (is_valid_state(mdev, os) == rv) - rv = is_valid_state_transition(mdev, ns, os); + rv = is_valid_soft_transition(os, ns); } else - rv = is_valid_state_transition(mdev, ns, os); + rv = is_valid_soft_transition(os, ns); } if (rv < SS_SUCCESS) { From 3509502dc88ce8226b29bea5e25edf066eca9a8a Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 9 Feb 2011 16:29:33 +0100 Subject: [PATCH 106/609] drbd: Extracted is_valid_transition() out of sanitize_state() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 71 +++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 22 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index d5777159a2b..3199bf92e46 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -47,6 +47,7 @@ static void after_conn_state_ch(struct drbd_tconn *tconn, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state); +static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns); static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, const char **warn_sync_abort); @@ -112,15 +113,17 @@ _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags)) return SS_CW_FAILED_BY_PEER; - rv = 0; spin_lock_irqsave(&mdev->tconn->req_lock, flags); os = mdev->state; ns.i = (os.i & ~mask.i) | val.i; ns = sanitize_state(mdev, os, ns, NULL); + rv = is_valid_transition(os, ns); + if (rv == SS_SUCCESS) + rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ if (!cl_wide_st_chg(mdev, os, ns)) rv = SS_CW_NO_NEED; - if (!rv) { + if (rv == SS_UNKNOWN_ERROR) { rv = is_valid_state(mdev, ns); if (rv == SS_SUCCESS) { rv = is_valid_soft_transition(os, ns); @@ -160,8 +163,10 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, spin_lock_irqsave(&mdev->tconn->req_lock, flags); os = mdev->state; ns.i = (os.i & ~mask.i) | val.i; - ns = sanitize_state(mdev, os, ns, NULL); + rv = is_valid_transition(os, ns); + if (rv < SS_SUCCESS) + goto abort; if (cl_wide_st_chg(mdev, os, ns)) { rv = is_valid_state(mdev, ns); @@ -340,6 +345,8 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) /** * is_valid_soft_transition() - Returns an SS_ error code if the state transition is not possible + * This function limits state transitions that may be declined by DRBD. I.e. + * user requests (aka soft transitions). * @mdev: DRBD device. * @ns: new state. * @os: old state. @@ -389,6 +396,40 @@ is_valid_soft_transition(union drbd_state os, union drbd_state ns) return rv; } +/** + * is_valid_transition() - Returns an SS_ error code if the state transition is not possible + * This limits hard state transitions. Hard state transitions are facts there are + * imposed on DRBD by the environment. E.g. disk broke or network broke down. + * But those hard state transitions are still not allowed to do everything. + * @ns: new state. + * @os: old state. + */ +static enum drbd_state_rv +is_valid_transition(union drbd_state os, union drbd_state ns) +{ + enum drbd_state_rv rv = SS_SUCCESS; + + /* Disallow Network errors to configure a device's network part */ + if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) && + os.conn <= C_DISCONNECTING) + rv = SS_NEED_CONNECTION; + + /* After a network error only C_UNCONNECTED or C_DISCONNECTING may follow. */ + if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN && + ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING) + rv = SS_IN_TRANSIENT_STATE; + + /* After C_DISCONNECTING only C_STANDALONE may follow */ + if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE) + rv = SS_IN_TRANSIENT_STATE; + + /* we cannot fail (again) if we already detached */ + if (ns.disk == D_FAILED && os.disk == D_DISKLESS) + rv = SS_IS_DISKLESS; + + return rv; +} + /** * sanitize_state() - Resolves implicitly necessary additional changes to a state transition * @mdev: DRBD device. @@ -411,30 +452,12 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state put_ldev(mdev); } - /* Disallow Network errors to configure a device's network part */ - if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) && - os.conn <= C_DISCONNECTING) - ns.conn = os.conn; - - /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow. - * If you try to go into some Sync* state, that shall fail (elsewhere). */ - if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN && - ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_TEAR_DOWN) - ns.conn = os.conn; - - /* we cannot fail (again) if we already detached */ - if (ns.disk == D_FAILED && os.disk == D_DISKLESS) - ns.disk = D_DISKLESS; - /* if we are only D_ATTACHING yet, * we can (and should) go directly to D_DISKLESS. */ if (ns.disk == D_FAILED && os.disk == D_ATTACHING) ns.disk = D_DISKLESS; - /* After C_DISCONNECTING only C_STANDALONE may follow */ - if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE) - ns.conn = os.conn; - + /* Implications from connection to peer and peer_isp */ if (ns.conn < C_CONNECTED) { ns.peer_isp = 0; ns.peer = R_UNKNOWN; @@ -641,6 +664,10 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, if (ns.i == os.i) return SS_NOTHING_TO_DO; + rv = is_valid_transition(os, ns); + if (rv < SS_SUCCESS) + return rv; + if (!(flags & CS_HARD)) { /* pre-state-change checks ; only look at ns */ /* See drbd_state_sw_errors in drbd_strings.c */ From fda74117dc7f07b844c398157f1ed398f3bc02da Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 10 Feb 2011 10:38:06 +0100 Subject: [PATCH 107/609] drbd: Extracted is_valid_conn_transition() out of is_valid_transition() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 37 ++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 3199bf92e46..b381faade0a 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -396,6 +396,27 @@ is_valid_soft_transition(union drbd_state os, union drbd_state ns) return rv; } +static enum drbd_state_rv +is_valid_conn_transition(enum drbd_conns oc, enum drbd_conns nc) +{ + enum drbd_state_rv rv = SS_SUCCESS; + + /* Disallow Network errors to configure a device's network part */ + if ((nc >= C_TIMEOUT && nc <= C_TEAR_DOWN) && oc <= C_DISCONNECTING) + rv = SS_NEED_CONNECTION; + + /* After a network error only C_UNCONNECTED or C_DISCONNECTING may follow. */ + if (oc >= C_TIMEOUT && oc <= C_TEAR_DOWN && nc != C_UNCONNECTED && nc != C_DISCONNECTING) + rv = SS_IN_TRANSIENT_STATE; + + /* After C_DISCONNECTING only C_STANDALONE may follow */ + if (oc == C_DISCONNECTING && nc != C_STANDALONE) + rv = SS_IN_TRANSIENT_STATE; + + return rv; +} + + /** * is_valid_transition() - Returns an SS_ error code if the state transition is not possible * This limits hard state transitions. Hard state transitions are facts there are @@ -407,21 +428,9 @@ is_valid_soft_transition(union drbd_state os, union drbd_state ns) static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns) { - enum drbd_state_rv rv = SS_SUCCESS; + enum drbd_state_rv rv; - /* Disallow Network errors to configure a device's network part */ - if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) && - os.conn <= C_DISCONNECTING) - rv = SS_NEED_CONNECTION; - - /* After a network error only C_UNCONNECTED or C_DISCONNECTING may follow. */ - if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN && - ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING) - rv = SS_IN_TRANSIENT_STATE; - - /* After C_DISCONNECTING only C_STANDALONE may follow */ - if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE) - rv = SS_IN_TRANSIENT_STATE; + rv = is_valid_conn_transition(os.conn, ns.conn); /* we cannot fail (again) if we already detached */ if (ns.disk == D_FAILED && os.disk == D_DISKLESS) From 4308a0a390deee88b6de6dbf8b05bcf0f506bbcf Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 10 Feb 2011 11:24:38 +0100 Subject: [PATCH 108/609] drbd: Removed the os parameter form sanitize_state() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 39 ++++++++++++++++----------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index b381faade0a..02516ed9127 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -48,8 +48,8 @@ static void after_conn_state_ch(struct drbd_tconn *tconn, union drbd_state os, static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state); static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns); -static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, - union drbd_state ns, const char **warn_sync_abort); +static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns, + const char **warn_sync_abort); /** * cl_wide_st_chg() - true if the state change is a cluster wide one @@ -116,7 +116,7 @@ _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, spin_lock_irqsave(&mdev->tconn->req_lock, flags); os = mdev->state; ns.i = (os.i & ~mask.i) | val.i; - ns = sanitize_state(mdev, os, ns, NULL); + ns = sanitize_state(mdev, ns, NULL); rv = is_valid_transition(os, ns); if (rv == SS_SUCCESS) rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ @@ -163,7 +163,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, spin_lock_irqsave(&mdev->tconn->req_lock, flags); os = mdev->state; ns.i = (os.i & ~mask.i) | val.i; - ns = sanitize_state(mdev, os, ns, NULL); + ns = sanitize_state(mdev, ns, NULL); rv = is_valid_transition(os, ns); if (rv < SS_SUCCESS) goto abort; @@ -436,6 +436,13 @@ is_valid_transition(union drbd_state os, union drbd_state ns) if (ns.disk == D_FAILED && os.disk == D_DISKLESS) rv = SS_IS_DISKLESS; + /* if we are only D_ATTACHING yet, + * we can (and should) go directly to D_DISKLESS. */ + if (ns.disk == D_FAILED && os.disk == D_ATTACHING) { + printk("TODO: FIX ME\n"); + rv = SS_IS_DISKLESS; + } + return rv; } @@ -449,8 +456,8 @@ is_valid_transition(union drbd_state os, union drbd_state ns) * When we loose connection, we have to set the state of the peers disk (pdsk) * to D_UNKNOWN. This rule and many more along those lines are in this function. */ -static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, - union drbd_state ns, const char **warn_sync_abort) +static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns, + const char **warn_sync_abort) { enum drbd_fencing_p fp; enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max; @@ -461,11 +468,6 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state put_ldev(mdev); } - /* if we are only D_ATTACHING yet, - * we can (and should) go directly to D_DISKLESS. */ - if (ns.disk == D_FAILED && os.disk == D_ATTACHING) - ns.disk = D_DISKLESS; - /* Implications from connection to peer and peer_isp */ if (ns.conn < C_CONNECTED) { ns.peer_isp = 0; @@ -478,12 +480,12 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY) ns.aftr_isp = 0; + /* An implication of the disk states onto the connection state */ /* Abort resync if a disk fails/detaches */ - if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED && - (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { + if (ns.conn > C_CONNECTED && (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { if (warn_sync_abort) *warn_sync_abort = - os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ? + ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T ? "Online-verify" : "Resync"; ns.conn = C_CONNECTED; } @@ -591,13 +593,11 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state } if (fp == FP_STONITH && - (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) && - !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED)) + (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED)) ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */ if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO && - (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) && - !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE)) + (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)) ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */ if (ns.aftr_isp || ns.peer_isp || ns.user_isp) { @@ -668,8 +668,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, os = mdev->state; - ns = sanitize_state(mdev, os, ns, &warn_sync_abort); - + ns = sanitize_state(mdev, ns, &warn_sync_abort); if (ns.i == os.i) return SS_NOTHING_TO_DO; From 56707f9e873108c0173b4edf20ea452e1d2a89d2 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 16 Feb 2011 14:57:50 +0100 Subject: [PATCH 109/609] drbd: Code de-duplication; new function apply_mask_val() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 02516ed9127..0100aab1288 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -69,17 +69,24 @@ static int cl_wide_st_chg(struct drbd_conf *mdev, (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S); } +static union drbd_state +apply_mask_val(union drbd_state os, union drbd_state mask, union drbd_state val) +{ + union drbd_state ns; + ns.i = (os.i & ~mask.i) | val.i; + return ns; +} + enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, union drbd_state mask, union drbd_state val) { unsigned long flags; - union drbd_state os, ns; + union drbd_state ns; enum drbd_state_rv rv; spin_lock_irqsave(&mdev->tconn->req_lock, flags); - os = mdev->state; - ns.i = (os.i & ~mask.i) | val.i; + ns = apply_mask_val(mdev->state, mask, val); rv = _drbd_set_state(mdev, ns, f, NULL); ns = mdev->state; spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); @@ -115,8 +122,7 @@ _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, spin_lock_irqsave(&mdev->tconn->req_lock, flags); os = mdev->state; - ns.i = (os.i & ~mask.i) | val.i; - ns = sanitize_state(mdev, ns, NULL); + ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL); rv = is_valid_transition(os, ns); if (rv == SS_SUCCESS) rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ @@ -162,8 +168,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, spin_lock_irqsave(&mdev->tconn->req_lock, flags); os = mdev->state; - ns.i = (os.i & ~mask.i) | val.i; - ns = sanitize_state(mdev, ns, NULL); + ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL); rv = is_valid_transition(os, ns); if (rv < SS_SUCCESS) goto abort; @@ -199,8 +204,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, goto abort; } spin_lock_irqsave(&mdev->tconn->req_lock, flags); - os = mdev->state; - ns.i = (os.i & ~mask.i) | val.i; + ns = apply_mask_val(mdev->state, mask, val); rv = _drbd_set_state(mdev, ns, f, &done); drbd_state_unlock(mdev); } else { From bbeb641c3e4982d6bba21188545a7fd44ab0a715 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 10 Feb 2011 13:45:46 +0100 Subject: [PATCH 110/609] drbd: Killed volume0; last step of multi-volume-enablement Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 +- drivers/block/drbd/drbd_main.c | 8 +- drivers/block/drbd/drbd_nl.c | 2 +- drivers/block/drbd/drbd_receiver.c | 39 ++-- drivers/block/drbd/drbd_state.c | 294 +++++++++++++++++++++++------ drivers/block/drbd/drbd_state.h | 10 + drivers/block/drbd/drbd_worker.c | 7 +- 7 files changed, 277 insertions(+), 87 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e2b59f58a0a..f718124c5c8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -918,8 +918,8 @@ enum { struct drbd_tconn { /* is a resource from the config file */ char *name; /* Resource name */ struct list_head all_tconn; /* List of all drbd_tconn, prot by global_state_lock */ - struct drbd_conf *volume0; /* TODO: Remove me again */ struct idr volumes; /* to mdev mapping */ + enum drbd_conns cstate; /* Only C_STANDALONE to C_WF_REPORT_PARAMS */ unsigned long flags; struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ @@ -2024,7 +2024,7 @@ static inline int get_net_conf(struct drbd_tconn *tconn) int have_net_conf; atomic_inc(&tconn->net_cnt); - have_net_conf = tconn->volume0->state.conn >= C_UNCONNECTED; + have_net_conf = tconn->cstate >= C_UNCONNECTED; if (!have_net_conf) put_net_conf(tconn); return have_net_conf; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index b43ad87a536..b64b7388ee9 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1344,7 +1344,7 @@ static int we_should_drop_the_connection(struct drbd_tconn *tconn, struct socket drop_it = tconn->meta.socket == sock || !tconn->asender.task || get_t_state(&tconn->asender) != RUNNING - || tconn->volume0->state.conn < C_CONNECTED; + || tconn->cstate < C_WF_REPORT_PARAMS; if (drop_it) return true; @@ -1705,9 +1705,9 @@ int drbd_send(struct drbd_tconn *tconn, struct socket *sock, conn_err(tconn, "%s_sendmsg returned %d\n", sock == tconn->meta.socket ? "msock" : "sock", rv); - drbd_force_state(tconn->volume0, NS(conn, C_BROKEN_PIPE)); + conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD); } else - drbd_force_state(tconn->volume0, NS(conn, C_TIMEOUT)); + conn_request_state(tconn, NS(conn, C_TIMEOUT), CS_HARD); } return sent; @@ -2188,6 +2188,7 @@ struct drbd_tconn *drbd_new_tconn(char *name) if (!tconn->name) goto fail; + tconn->cstate = C_STANDALONE; spin_lock_init(&tconn->req_lock); atomic_set(&tconn->net_cnt, 0); init_waitqueue_head(&tconn->net_cnt_wait); @@ -2258,7 +2259,6 @@ struct drbd_conf *drbd_new_device(unsigned int minor) if (!zalloc_cpumask_var(&mdev->tconn->cpu_mask, GFP_KERNEL)) goto out_no_cpumask; - mdev->tconn->volume0 = mdev; mdev->minor = minor; drbd_init_set_defaults(mdev); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 0debe589b67..eeb284aef3c 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1547,7 +1547,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, mdev->tconn->int_dig_out=int_dig_out; mdev->tconn->int_dig_in=int_dig_in; mdev->tconn->int_dig_vv=int_dig_vv; - retcode = _drbd_set_state(_NS(mdev, conn, C_UNCONNECTED), CS_VERBOSE, NULL); + retcode = _conn_request_state(mdev->tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE); spin_unlock_irq(&mdev->tconn->req_lock); kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 2b69a15a55d..27e1eb7ce54 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -551,7 +551,7 @@ static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size) set_fs(oldfs); if (rv != size) - drbd_force_state(tconn->volume0, NS(conn, C_BROKEN_PIPE)); + conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD); return rv; } @@ -647,7 +647,7 @@ out: conn_err(tconn, "%s failed, err = %d\n", what, err); } if (disconnect_on_error) - drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING)); + conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); } put_net_conf(tconn); return sock; @@ -694,7 +694,7 @@ out: if (err < 0) { if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) { conn_err(tconn, "%s failed, err = %d\n", what, err); - drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING)); + conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); } } put_net_conf(tconn); @@ -776,7 +776,7 @@ static int drbd_connect(struct drbd_tconn *tconn) struct socket *s, *sock, *msock; int try, h, ok; - if (drbd_request_state(tconn->volume0, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS) + if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS) return -2; clear_bit(DISCARD_CONCURRENT, &tconn->flags); @@ -850,7 +850,7 @@ retry: } } - if (tconn->volume0->state.conn <= C_DISCONNECTING) + if (tconn->cstate <= C_DISCONNECTING) goto out_release_sockets; if (signal_pending(current)) { flush_signals(current); @@ -912,7 +912,7 @@ retry: } } - if (drbd_request_state(tconn->volume0, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS) + if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS) return 0; sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10; @@ -3817,7 +3817,7 @@ static void drbdd(struct drbd_tconn *tconn) if (0) { err_out: - drbd_force_state(tconn->volume0, NS(conn, C_PROTOCOL_ERROR)); + conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD); } } @@ -3834,10 +3834,10 @@ void drbd_flush_workqueue(struct drbd_conf *mdev) static void drbd_disconnect(struct drbd_tconn *tconn) { - union drbd_state os, ns; + enum drbd_conns oc; int rv = SS_UNKNOWN_ERROR; - if (tconn->volume0->state.conn == C_STANDALONE) + if (tconn->cstate == C_STANDALONE) return; /* asender does not clean up anything. it must not interfere, either */ @@ -3849,16 +3849,13 @@ static void drbd_disconnect(struct drbd_tconn *tconn) conn_info(tconn, "Connection closed\n"); spin_lock_irq(&tconn->req_lock); - os = tconn->volume0->state; - if (os.conn >= C_UNCONNECTED) { - /* Do not restart in case we are C_DISCONNECTING */ - ns.i = os.i; - ns.conn = C_UNCONNECTED; - rv = _drbd_set_state(tconn->volume0, ns, CS_VERBOSE, NULL); - } + oc = tconn->cstate; + if (oc >= C_UNCONNECTED) + rv = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE); + spin_unlock_irq(&tconn->req_lock); - if (os.conn == C_DISCONNECTING) { + if (oc == C_DISCONNECTING) { wait_event(tconn->net_cnt_wait, atomic_read(&tconn->net_cnt) == 0); crypto_free_hash(tconn->cram_hmac_tfm); @@ -3866,7 +3863,7 @@ static void drbd_disconnect(struct drbd_tconn *tconn) kfree(tconn->net_conf); tconn->net_conf = NULL; - drbd_request_state(tconn->volume0, NS(conn, C_STANDALONE)); + conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE); } } @@ -4240,7 +4237,7 @@ int drbdd_init(struct drbd_thread *thi) } if (h == -1) { conn_warn(tconn, "Discarding network configuration.\n"); - drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING)); + conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); } } while (h == 0); @@ -4709,11 +4706,11 @@ int drbd_asender(struct drbd_thread *thi) if (0) { reconnect: - drbd_force_state(tconn->volume0, NS(conn, C_NETWORK_FAILURE)); + conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD); } if (0) { disconnect: - drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING)); + conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); } clear_bit(SIGNAL_ASENDER, &tconn->flags); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 0100aab1288..7376d9dc0bc 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -43,8 +43,7 @@ int drbd_send_state_req(struct drbd_conf *, union drbd_state, union drbd_state); static int w_after_state_ch(struct drbd_work *w, int unused); static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); -static void after_conn_state_ch(struct drbd_tconn *tconn, union drbd_state os, - union drbd_state ns, enum chg_state_flags flags); +static void after_all_state_ch(struct drbd_tconn *tconn, union drbd_state ns); static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state); static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns); @@ -275,6 +274,51 @@ void print_st_err(struct drbd_conf *mdev, union drbd_state os, print_st(mdev, "wanted", ns); } +static void print_state_change(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, + enum chg_state_flags flags) +{ + char *pbp, pb[300]; + pbp = pb; + *pbp = 0; + if (ns.role != os.role) + pbp += sprintf(pbp, "role( %s -> %s ) ", + drbd_role_str(os.role), + drbd_role_str(ns.role)); + if (ns.peer != os.peer) + pbp += sprintf(pbp, "peer( %s -> %s ) ", + drbd_role_str(os.peer), + drbd_role_str(ns.peer)); + if (ns.conn != os.conn && !(flags & CS_NO_CSTATE_CHG)) + pbp += sprintf(pbp, "conn( %s -> %s ) ", + drbd_conn_str(os.conn), + drbd_conn_str(ns.conn)); + if (ns.disk != os.disk) + pbp += sprintf(pbp, "disk( %s -> %s ) ", + drbd_disk_str(os.disk), + drbd_disk_str(ns.disk)); + if (ns.pdsk != os.pdsk) + pbp += sprintf(pbp, "pdsk( %s -> %s ) ", + drbd_disk_str(os.pdsk), + drbd_disk_str(ns.pdsk)); + if (is_susp(ns) != is_susp(os)) + pbp += sprintf(pbp, "susp( %d -> %d ) ", + is_susp(os), + is_susp(ns)); + if (ns.aftr_isp != os.aftr_isp) + pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ", + os.aftr_isp, + ns.aftr_isp); + if (ns.peer_isp != os.peer_isp) + pbp += sprintf(pbp, "peer_isp( %d -> %d ) ", + os.peer_isp, + ns.peer_isp); + if (ns.user_isp != os.user_isp) + pbp += sprintf(pbp, "user_isp( %d -> %d ) ", + os.user_isp, + ns.user_isp); + if (pbp != pb) + dev_info(DEV, "%s\n", pb); +} /** * is_valid_state() - Returns an SS_ error code if ns is not valid @@ -704,48 +748,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, if (warn_sync_abort) dev_warn(DEV, "%s aborted.\n", warn_sync_abort); - { - char *pbp, pb[300]; - pbp = pb; - *pbp = 0; - if (ns.role != os.role) - pbp += sprintf(pbp, "role( %s -> %s ) ", - drbd_role_str(os.role), - drbd_role_str(ns.role)); - if (ns.peer != os.peer) - pbp += sprintf(pbp, "peer( %s -> %s ) ", - drbd_role_str(os.peer), - drbd_role_str(ns.peer)); - if (ns.conn != os.conn) - pbp += sprintf(pbp, "conn( %s -> %s ) ", - drbd_conn_str(os.conn), - drbd_conn_str(ns.conn)); - if (ns.disk != os.disk) - pbp += sprintf(pbp, "disk( %s -> %s ) ", - drbd_disk_str(os.disk), - drbd_disk_str(ns.disk)); - if (ns.pdsk != os.pdsk) - pbp += sprintf(pbp, "pdsk( %s -> %s ) ", - drbd_disk_str(os.pdsk), - drbd_disk_str(ns.pdsk)); - if (is_susp(ns) != is_susp(os)) - pbp += sprintf(pbp, "susp( %d -> %d ) ", - is_susp(os), - is_susp(ns)); - if (ns.aftr_isp != os.aftr_isp) - pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ", - os.aftr_isp, - ns.aftr_isp); - if (ns.peer_isp != os.peer_isp) - pbp += sprintf(pbp, "peer_isp( %d -> %d ) ", - os.peer_isp, - ns.peer_isp); - if (ns.user_isp != os.user_isp) - pbp += sprintf(pbp, "user_isp( %d -> %d ) ", - os.user_isp, - ns.user_isp); - dev_info(DEV, "%s\n", pb); - } + print_state_change(mdev, os, ns, flags); /* solve the race between becoming unconfigured, * worker doing the cleanup, and @@ -887,7 +890,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, ascw->done = done; drbd_queue_work(&mdev->tconn->data.work, &ascw->w); } else { - dev_warn(DEV, "Could not kmalloc an ascw\n"); + dev_err(DEV, "Could not kmalloc an ascw\n"); } return rv; @@ -1239,21 +1242,202 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, resume_next_sg(mdev); } - after_conn_state_ch(mdev->tconn, os, ns, flags); + after_all_state_ch(mdev->tconn, ns); + drbd_md_sync(mdev); } -static void after_conn_state_ch(struct drbd_tconn *tconn, union drbd_state os, - union drbd_state ns, enum chg_state_flags flags) -{ - /* Upon network configuration, we need to start the receiver */ - if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED) - drbd_thread_start(&tconn->receiver); +struct after_conn_state_chg_work { + struct drbd_work w; + enum drbd_conns oc; + union drbd_state nms; /* new, max state, over all mdevs */ + enum chg_state_flags flags; +}; - if (ns.disk == D_DISKLESS && - ns.conn == C_STANDALONE && - ns.role == R_SECONDARY) { +static void after_all_state_ch(struct drbd_tconn *tconn, union drbd_state ns) +{ + if (ns.disk == D_DISKLESS && ns.conn == C_STANDALONE && ns.role == R_SECONDARY) { /* if (test_bit(DEVICE_DYING, &mdev->flags)) TODO: DEVICE_DYING functionality */ drbd_thread_stop_nowait(&tconn->worker); } } + +static int w_after_conn_state_ch(struct drbd_work *w, int unused) +{ + struct after_conn_state_chg_work *acscw = + container_of(w, struct after_conn_state_chg_work, w); + struct drbd_tconn *tconn = w->tconn; + enum drbd_conns oc = acscw->oc; + union drbd_state nms = acscw->nms; + + kfree(acscw); + + /* Upon network configuration, we need to start the receiver */ + if (oc == C_STANDALONE && nms.conn == C_UNCONNECTED) + drbd_thread_start(&tconn->receiver); + + //conn_err(tconn, STATE_FMT, STATE_ARGS("nms", nms)); + after_all_state_ch(tconn, nms); + + return 1; +} + +static void print_conn_state_change(struct drbd_tconn *tconn, enum drbd_conns oc, enum drbd_conns nc) +{ + char *pbp, pb[300]; + pbp = pb; + *pbp = 0; + if (nc != oc) + pbp += sprintf(pbp, "conn( %s -> %s ) ", + drbd_conn_str(oc), + drbd_conn_str(nc)); + + conn_info(tconn, "%s\n", pb); +} + +struct _is_valid_itr_params { + enum chg_state_flags flags; + union drbd_state mask, val; + union drbd_state ms; /* maximal state, over all mdevs */ + enum drbd_conns oc; + enum { + OC_UNINITIALIZED, + OC_CONSISTENT, + OC_INCONSISTENT, + } oc_state; +}; + +static int _is_valid_itr_fn(int vnr, void *p, void *data) +{ + struct drbd_conf *mdev = (struct drbd_conf *)p; + struct _is_valid_itr_params *params = (struct _is_valid_itr_params *)data; + enum chg_state_flags flags = params->flags; + union drbd_state ns, os; + enum drbd_state_rv rv; + + os = mdev->state; + ns = apply_mask_val(os, params->mask, params->val); + ns = sanitize_state(mdev, ns, NULL); + rv = is_valid_state(mdev, ns); + + if (rv < SS_SUCCESS) { + /* If the old state was illegal as well, then let this happen...*/ + + if (is_valid_state(mdev, os) == rv) + rv = is_valid_soft_transition(os, ns); + } else + rv = is_valid_soft_transition(os, ns); + + switch (params->oc_state) { + case OC_UNINITIALIZED: + params->oc = os.conn; + params->oc_state = OC_CONSISTENT; + break; + case OC_CONSISTENT: + if (params->oc != os.conn) + params->oc_state = OC_INCONSISTENT; + break; + case OC_INCONSISTENT: + break; + } + + if (rv < SS_SUCCESS) { + if (flags & CS_VERBOSE) + print_st_err(mdev, os, ns, rv); + return rv; + } else + return 0; +} + +static int _set_state_itr_fn(int vnr, void *p, void *data) +{ + struct drbd_conf *mdev = (struct drbd_conf *)p; + struct _is_valid_itr_params *params = (struct _is_valid_itr_params *)data; + enum chg_state_flags flags = params->flags; + union drbd_state os, ns, ms = params->ms; + enum drbd_state_rv rv; + + os = mdev->state; + ns = apply_mask_val(os, params->mask, params->val); + ns = sanitize_state(mdev, ns, NULL); + + rv = __drbd_set_state(mdev, ns, flags, NULL); + + ms.role = max_t(enum drbd_role, mdev->state.role, ms.role); + ms.peer = max_t(enum drbd_role, mdev->state.peer, ms.peer); + ms.disk = max_t(enum drbd_role, mdev->state.disk, ms.disk); + ms.pdsk = max_t(enum drbd_role, mdev->state.pdsk, ms.pdsk); + params->ms = ms; + + return 0; +} + +enum drbd_state_rv +_conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, + enum chg_state_flags flags) +{ + enum drbd_state_rv rv = SS_SUCCESS; + struct _is_valid_itr_params params; + struct after_conn_state_chg_work *acscw; + enum drbd_conns oc = tconn->cstate; + + read_lock(&global_state_lock); + + rv = is_valid_conn_transition(oc, val.conn); + if (rv < SS_SUCCESS) + goto abort; + + params.flags = flags; + params.mask = mask; + params.val = val; + params.oc_state = OC_UNINITIALIZED; + + if (!(flags & CS_HARD)) + rv = idr_for_each(&tconn->volumes, _is_valid_itr_fn, ¶ms); + + if (rv == 0) /* idr_for_each semantics */ + rv = SS_SUCCESS; + + if (rv < SS_SUCCESS) + goto abort; + + if (params.oc_state == OC_CONSISTENT) { + oc = params.oc; + print_conn_state_change(tconn, oc, val.conn); + params.flags |= CS_NO_CSTATE_CHG; + } + tconn->cstate = val.conn; + params.ms.i = 0; + params.ms.conn = val.conn; + idr_for_each(&tconn->volumes, _set_state_itr_fn, ¶ms); + + acscw = kmalloc(sizeof(*acscw), GFP_ATOMIC); + if (acscw) { + acscw->oc = oc; + acscw->nms = params.ms; + acscw->flags = flags; + acscw->w.cb = w_after_conn_state_ch; + acscw->w.tconn = tconn; + drbd_queue_work(&tconn->data.work, &acscw->w); + } else { + conn_err(tconn, "Could not kmalloc an acscw\n"); + } + +abort: + read_unlock(&global_state_lock); + + return rv; +} + +enum drbd_state_rv +conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, + enum chg_state_flags flags) +{ + enum drbd_state_rv rv; + + spin_lock_irq(&tconn->req_lock); + rv = _conn_request_state(tconn, mask, val, flags); + spin_unlock_irq(&tconn->req_lock); + + return rv; +} diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index 3ec26e2c4c4..d312d84b841 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -2,6 +2,7 @@ #define DRBD_STATE_H struct drbd_conf; +struct drbd_tconn; /** * DOC: DRBD State macros @@ -61,6 +62,7 @@ enum chg_state_flags { CS_WAIT_COMPLETE = 4, CS_SERIALIZE = 8, CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE, + CS_NO_CSTATE_CHG = 16, /* Do not display changes in cstate. Internal to drbd_state.c */ }; extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev, @@ -79,6 +81,14 @@ extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state, extern void print_st_err(struct drbd_conf *, union drbd_state, union drbd_state, int); +enum drbd_state_rv +_conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, + enum chg_state_flags flags); + +enum drbd_state_rv +conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, + enum chg_state_flags flags); + extern void drbd_resume_al(struct drbd_conf *mdev); /** diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 8539df25bc2..eee017dd6d7 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1720,11 +1720,10 @@ int drbd_worker(struct drbd_thread *thi) list_del_init(&w->list); spin_unlock_irq(&tconn->data.work.q_lock); - if (!w->cb(w, tconn->volume0->state.conn < C_CONNECTED)) { + if (!w->cb(w, tconn->cstate < C_WF_REPORT_PARAMS)) { /* dev_warn(DEV, "worker: a callback failed! \n"); */ - if (tconn->volume0->state.conn >= C_CONNECTED) - drbd_force_state(tconn->volume0, - NS(conn, C_NETWORK_FAILURE)); + if (tconn->cstate >= C_WF_REPORT_PARAMS) + conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD); } } From dad20554812e73a2bfbe45d1b161d5d3c249e597 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 11 Feb 2011 19:43:55 +0100 Subject: [PATCH 111/609] drbd: Removed drbd_state_lock() and drbd_state_unlock() The lock they constructed is only taken when the state_mutex was already taken. It is superficial. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 18 ------------------ drivers/block/drbd/drbd_receiver.c | 5 +++-- drivers/block/drbd/drbd_state.c | 4 ---- drivers/block/drbd/drbd_worker.c | 10 +++++----- 4 files changed, 8 insertions(+), 29 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index f718124c5c8..2dbcd13ba2a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -764,7 +764,6 @@ enum { UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */ MD_DIRTY, /* current uuids and flags not yet on disk */ USE_DEGR_WFC_T, /* degr-wfc-timeout instead of wfc-timeout. */ - CLUSTER_ST_CHANGE, /* Cluster wide state change going on... */ CL_ST_CHG_SUCCESS, CL_ST_CHG_FAIL, CRASHED_PRIMARY, /* This node was a crashed primary. @@ -1664,23 +1663,6 @@ static inline int drbd_ee_has_active_page(struct drbd_peer_request *peer_req) return 0; } - - - - - -static inline void drbd_state_lock(struct drbd_conf *mdev) -{ - wait_event(mdev->misc_wait, - !test_and_set_bit(CLUSTER_ST_CHANGE, &mdev->flags)); -} - -static inline void drbd_state_unlock(struct drbd_conf *mdev) -{ - clear_bit(CLUSTER_ST_CHANGE, &mdev->flags); - wake_up(&mdev->misc_wait); -} - static inline enum drbd_state_rv _drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, enum chg_state_flags flags, struct completion *done) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 27e1eb7ce54..423e4dd2d53 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3167,7 +3167,8 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd, ongoing cluster wide state change is finished. That is important if we are primary and are detaching from our disk. We need to see the new disk state... */ - wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags)); + mutex_lock(&mdev->state_mutex); + mutex_unlock(&mdev->state_mutex); if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT) updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); @@ -3218,7 +3219,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd, val.i = be32_to_cpu(p->val); if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) && - test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) { + mutex_is_locked(&mdev->state_mutex)) { drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG); return true; } diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 7376d9dc0bc..91433168e1d 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -184,9 +184,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, goto abort; } - drbd_state_lock(mdev); if (!drbd_send_state_req(mdev, mask, val)) { - drbd_state_unlock(mdev); rv = SS_CW_FAILED_BY_PEER; if (f & CS_VERBOSE) print_st_err(mdev, os, ns, rv); @@ -197,7 +195,6 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, (rv = _req_st_cond(mdev, mask, val))); if (rv < SS_SUCCESS) { - drbd_state_unlock(mdev); if (f & CS_VERBOSE) print_st_err(mdev, os, ns, rv); goto abort; @@ -205,7 +202,6 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, spin_lock_irqsave(&mdev->tconn->req_lock, flags); ns = apply_mask_val(mdev->state, mask, val); rv = _drbd_set_state(mdev, ns, f, &done); - drbd_state_unlock(mdev); } else { rv = _drbd_set_state(mdev, ns, f, &done); } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index eee017dd6d7..e8448712b95 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1536,21 +1536,21 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) } if (current == mdev->tconn->worker.task) { - /* The worker should not sleep waiting for drbd_state_lock(), + /* The worker should not sleep waiting for state_mutex, that can take long */ - if (test_and_set_bit(CLUSTER_ST_CHANGE, &mdev->flags)) { + if (!mutex_trylock(&mdev->state_mutex)) { set_bit(B_RS_H_DONE, &mdev->flags); mdev->start_resync_timer.expires = jiffies + HZ/5; add_timer(&mdev->start_resync_timer); return; } } else { - drbd_state_lock(mdev); + mutex_lock(&mdev->state_mutex); } clear_bit(B_RS_H_DONE, &mdev->flags); if (!get_ldev_if_state(mdev, D_NEGOTIATING)) { - drbd_state_unlock(mdev); + mutex_unlock(&mdev->state_mutex); return; } @@ -1639,7 +1639,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) drbd_md_sync(mdev); } put_ldev(mdev); - drbd_state_unlock(mdev); + mutex_unlock(&mdev->state_mutex); } static int _worker_dying(int vnr, void *p, void *data) From 8410da8f0e3ff5c97bce1b10627316be509ce476 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 11 Feb 2011 20:11:10 +0100 Subject: [PATCH 112/609] drbd: Introduced tconn->cstate_mutex In compatibility mode with old DRBDs, use that as the state_mutex as well. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 8 +++++--- drivers/block/drbd/drbd_main.c | 4 +++- drivers/block/drbd/drbd_nl.c | 8 ++++---- drivers/block/drbd/drbd_receiver.c | 11 ++++++++--- drivers/block/drbd/drbd_state.c | 4 ++-- drivers/block/drbd/drbd_worker.c | 8 ++++---- 6 files changed, 26 insertions(+), 17 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 2dbcd13ba2a..152d07bcfb9 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -917,8 +917,9 @@ enum { struct drbd_tconn { /* is a resource from the config file */ char *name; /* Resource name */ struct list_head all_tconn; /* List of all drbd_tconn, prot by global_state_lock */ - struct idr volumes; /* to mdev mapping */ - enum drbd_conns cstate; /* Only C_STANDALONE to C_WF_REPORT_PARAMS */ + struct idr volumes; /* to mdev mapping */ + enum drbd_conns cstate; /* Only C_STANDALONE to C_WF_REPORT_PARAMS */ + struct mutex cstate_mutex; /* Protects graceful disconnects */ unsigned long flags; struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ @@ -1080,7 +1081,8 @@ struct drbd_conf { unsigned long comm_bm_set; /* communicated number of set bits. */ struct bm_io_work bm_io_work; u64 ed_uuid; /* UUID of the exposed data */ - struct mutex state_mutex; + struct mutex own_state_mutex; + struct mutex *state_mutex; /* either own_state_mutex or mdev->tconn->cstate_mutex */ char congestion_reason; /* Why we where congested... */ atomic_t rs_sect_in; /* for incoming resync data rate, SyncTarget */ atomic_t rs_sect_ev; /* for submitted resync data rate, both */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index b64b7388ee9..1781d0ad35e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1801,7 +1801,8 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) atomic_set(&mdev->ap_in_flight, 0); mutex_init(&mdev->md_io_mutex); - mutex_init(&mdev->state_mutex); + mutex_init(&mdev->own_state_mutex); + mdev->state_mutex = &mdev->own_state_mutex; spin_lock_init(&mdev->al_lock); spin_lock_init(&mdev->peer_seq_lock); @@ -2189,6 +2190,7 @@ struct drbd_tconn *drbd_new_tconn(char *name) goto fail; tconn->cstate = C_STANDALONE; + mutex_init(&tconn->cstate_mutex); spin_lock_init(&tconn->req_lock); atomic_set(&tconn->net_cnt, 0); init_waitqueue_head(&tconn->net_cnt_wait); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index eeb284aef3c..3d8e63190dc 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -320,7 +320,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) if (new_role == R_PRIMARY) request_ping(mdev->tconn); /* Detect a dead peer ASAP */ - mutex_lock(&mdev->state_mutex); + mutex_lock(mdev->state_mutex); mask.i = 0; mask.role = R_MASK; val.i = 0; val.role = new_role; @@ -439,7 +439,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); fail: - mutex_unlock(&mdev->state_mutex); + mutex_unlock(mdev->state_mutex); return rv; } @@ -2162,7 +2162,7 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl return 0; } - mutex_lock(&mdev->state_mutex); /* Protects us against serialized state changes. */ + mutex_lock(mdev->state_mutex); /* Protects us against serialized state changes. */ if (!get_ldev(mdev)) { retcode = ERR_NO_DISK; @@ -2204,7 +2204,7 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl out_dec: put_ldev(mdev); out: - mutex_unlock(&mdev->state_mutex); + mutex_unlock(mdev->state_mutex); reply->ret_code = retcode; return 0; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 423e4dd2d53..94c050ad55b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -753,6 +753,10 @@ static int drbd_connected(int vnr, void *p, void *data) atomic_set(&mdev->packet_seq, 0); mdev->peer_seq = 0; + mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ? + &mdev->tconn->cstate_mutex : + &mdev->own_state_mutex; + ok &= drbd_send_sync_param(mdev, &mdev->sync_conf); ok &= drbd_send_sizes(mdev, 0, 0); ok &= drbd_send_uuids(mdev); @@ -760,6 +764,7 @@ static int drbd_connected(int vnr, void *p, void *data) clear_bit(USE_DEGR_WFC_T, &mdev->flags); clear_bit(RESIZE_PENDING, &mdev->flags); + return !ok; } @@ -3167,8 +3172,8 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd, ongoing cluster wide state change is finished. That is important if we are primary and are detaching from our disk. We need to see the new disk state... */ - mutex_lock(&mdev->state_mutex); - mutex_unlock(&mdev->state_mutex); + mutex_lock(mdev->state_mutex); + mutex_unlock(mdev->state_mutex); if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT) updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); @@ -3219,7 +3224,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd, val.i = be32_to_cpu(p->val); if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) && - mutex_is_locked(&mdev->state_mutex)) { + mutex_is_locked(mdev->state_mutex)) { drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG); return true; } diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 91433168e1d..2cd4fcef554 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -163,7 +163,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, init_completion(&done); if (f & CS_SERIALIZE) - mutex_lock(&mdev->state_mutex); + mutex_lock(mdev->state_mutex); spin_lock_irqsave(&mdev->tconn->req_lock, flags); os = mdev->state; @@ -215,7 +215,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, abort: if (f & CS_SERIALIZE) - mutex_unlock(&mdev->state_mutex); + mutex_unlock(mdev->state_mutex); return rv; } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index e8448712b95..9a9a00eabe0 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1538,19 +1538,19 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) if (current == mdev->tconn->worker.task) { /* The worker should not sleep waiting for state_mutex, that can take long */ - if (!mutex_trylock(&mdev->state_mutex)) { + if (!mutex_trylock(mdev->state_mutex)) { set_bit(B_RS_H_DONE, &mdev->flags); mdev->start_resync_timer.expires = jiffies + HZ/5; add_timer(&mdev->start_resync_timer); return; } } else { - mutex_lock(&mdev->state_mutex); + mutex_lock(mdev->state_mutex); } clear_bit(B_RS_H_DONE, &mdev->flags); if (!get_ldev_if_state(mdev, D_NEGOTIATING)) { - mutex_unlock(&mdev->state_mutex); + mutex_unlock(mdev->state_mutex); return; } @@ -1639,7 +1639,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) drbd_md_sync(mdev); } put_ldev(mdev); - mutex_unlock(&mdev->state_mutex); + mutex_unlock(mdev->state_mutex); } static int _worker_dying(int vnr, void *p, void *data) From cf29c9d8c8eff69885ee4c8ddf5f9db4dcc5ab6e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 11 Feb 2011 15:11:24 +0100 Subject: [PATCH 113/609] drbd: Implemented conn_send_state_req() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 19 ++++++++++++++++++- drivers/block/drbd/drbd_main.c | 6 +++--- drivers/block/drbd/drbd_state.c | 2 -- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 152d07bcfb9..4e7454958b8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -221,8 +221,10 @@ enum drbd_packet { P_DELAY_PROBE = 0x27, /* is used on BOTH sockets */ P_OUT_OF_SYNC = 0x28, /* Mark as out of sync (Outrunning), data socket */ P_RS_CANCEL = 0x29, /* meta: Used to cancel RS_DATA_REQUEST packet by SyncSource */ + P_CONN_ST_CHG_REQ = 0x2a, /* data sock: Connection wide state request */ + P_CONN_ST_CHG_REPLY = 0x2b, /* meta sock: Connection side state req reply */ - P_MAX_CMD = 0x2A, + P_MAX_CMD = 0x2c, P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ P_MAX_OPT_CMD = 0x101, @@ -1177,6 +1179,8 @@ extern int drbd_send_uuids(struct drbd_conf *mdev); extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev); extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); +extern int _conn_send_state_req(struct drbd_tconn *, int vnr, enum drbd_packet cmd, + union drbd_state, union drbd_state); extern int _drbd_send_state(struct drbd_conf *mdev); extern int drbd_send_state(struct drbd_conf *mdev); extern int _conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct socket *sock, @@ -1896,6 +1900,19 @@ static inline int drbd_send_ping_ack(struct drbd_tconn *tconn) return conn_send_cmd(tconn, 0, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h)); } +static inline int drbd_send_state_req(struct drbd_conf *mdev, + union drbd_state mask, union drbd_state val) +{ + return _conn_send_state_req(mdev->tconn, mdev->vnr, P_STATE_CHG_REQ, mask, val); +} + +static inline int conn_send_state_req(struct drbd_tconn *tconn, + union drbd_state mask, union drbd_state val) +{ + enum drbd_packet cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REQ : P_CONN_ST_CHG_REQ; + return _conn_send_state_req(tconn, 0, cmd, mask, val); +} + static inline void drbd_thread_stop(struct drbd_thread *thi) { _drbd_thread_stop(thi, false, true); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 1781d0ad35e..e0efc918a5e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -970,15 +970,15 @@ int drbd_send_state(struct drbd_conf *mdev) return ok; } -int drbd_send_state_req(struct drbd_conf *mdev, - union drbd_state mask, union drbd_state val) +int _conn_send_state_req(struct drbd_tconn *tconn, int vnr, enum drbd_packet cmd, + union drbd_state mask, union drbd_state val) { struct p_req_state p; p.mask = cpu_to_be32(mask.i); p.val = cpu_to_be32(val.i); - return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_STATE_CHG_REQ, &p.head, sizeof(p)); + return conn_send_cmd(tconn, vnr, USE_DATA_SOCKET, cmd, &p.head, sizeof(p)); } int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 2cd4fcef554..f34e7d4c888 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -37,9 +37,7 @@ struct after_state_chg_work { struct completion *done; }; - extern void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what); -int drbd_send_state_req(struct drbd_conf *, union drbd_state, union drbd_state); static int w_after_state_ch(struct drbd_work *w, int unused); static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); From 5aabf467e3933ba3fc30fd06a70517ab8a27a9bb Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 10 Feb 2011 20:27:54 +0100 Subject: [PATCH 114/609] drbd: Global_state_lock not necessary here... Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index f34e7d4c888..8c49ca8dea3 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1375,8 +1375,6 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ struct after_conn_state_chg_work *acscw; enum drbd_conns oc = tconn->cstate; - read_lock(&global_state_lock); - rv = is_valid_conn_transition(oc, val.conn); if (rv < SS_SUCCESS) goto abort; @@ -1418,8 +1416,6 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ } abort: - read_unlock(&global_state_lock); - return rv; } From fc3b10a45ffd350e7638e50feae091a401c270bb Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 15 Feb 2011 11:07:59 +0100 Subject: [PATCH 115/609] drbd: Implemented receiving of P_CONN_ST_CHG_REPLY Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 ++ drivers/block/drbd/drbd_receiver.c | 28 ++++++++++++++++++++-------- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4e7454958b8..4363b393a12 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -914,6 +914,8 @@ enum { SEND_PING, /* whether asender should send a ping asap */ SIGNAL_ASENDER, /* whether asender wants to be interrupted */ GOT_PING_ACK, /* set when we receive a ping_ack packet, ping_wait gets woken */ + CONN_WD_ST_CHG_OKAY, + CONN_WD_ST_CHG_FAIL, }; struct drbd_tconn { /* is a resource from the config file */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 94c050ad55b..2a1094aa35b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4265,18 +4265,29 @@ int drbdd_init(struct drbd_thread *thi) static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd) { struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply; + struct drbd_tconn *tconn = mdev->tconn; int retcode = be32_to_cpu(p->retcode); - if (retcode >= SS_SUCCESS) { - set_bit(CL_ST_CHG_SUCCESS, &mdev->flags); - } else { - set_bit(CL_ST_CHG_FAIL, &mdev->flags); - dev_err(DEV, "Requested state change failed by peer: %s (%d)\n", - drbd_set_st_err_str(retcode), retcode); + if (cmd == P_STATE_CHG_REPLY) { + if (retcode >= SS_SUCCESS) { + set_bit(CL_ST_CHG_SUCCESS, &mdev->flags); + } else { + set_bit(CL_ST_CHG_FAIL, &mdev->flags); + dev_err(DEV, "Requested state change failed by peer: %s (%d)\n", + drbd_set_st_err_str(retcode), retcode); + } + wake_up(&mdev->state_wait); + } else /* conn == P_CONN_ST_CHG_REPLY */ { + if (retcode >= SS_SUCCESS) { + set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags); + } else { + set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags); + conn_err(tconn, "Requested state change failed by peer: %s (%d)\n", + drbd_set_st_err_str(retcode), retcode); + } + wake_up(&tconn->ping_wait); } - wake_up(&mdev->state_wait); - return true; } @@ -4553,6 +4564,7 @@ static struct asender_cmd *get_asender_cmd(int cmd) [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip }, [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply}, + [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_RqSReply }, [P_MAX_CMD] = { 0, NULL }, }; if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL) From 047cd4a682b09a7bc5dd5610262405bb085f8b19 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 15 Feb 2011 11:09:33 +0100 Subject: [PATCH 116/609] drbd: implemented receiving of P_CONN_ST_CHG_REQ Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_main.c | 10 ++++++++++ drivers/block/drbd/drbd_receiver.c | 10 ++++++++-- drivers/block/drbd/drbd_state.h | 1 + 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4363b393a12..b287bad4767 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1219,6 +1219,7 @@ extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size) extern int drbd_send_bitmap(struct drbd_conf *mdev); extern int _drbd_send_bitmap(struct drbd_conf *mdev); extern int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode); +extern int conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode); extern void drbd_free_bc(struct drbd_backing_dev *ldev); extern void drbd_mdev_cleanup(struct drbd_conf *mdev); void drbd_print_uuids(struct drbd_conf *mdev, const char *text); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index e0efc918a5e..592f0c949fd 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -990,6 +990,16 @@ int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode) return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY, &p.head, sizeof(p)); } +int conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode) +{ + struct p_req_state_reply p; + enum drbd_packet cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REPLY : P_CONN_ST_CHG_REPLY; + + p.retcode = cpu_to_be32(retcode); + + return conn_send_cmd(tconn, 0, USE_META_SOCKET, cmd, &p.head, sizeof(p)); +} + int fill_bitmap_rle_bits(struct drbd_conf *mdev, struct p_compressed_bm *p, struct bm_xfer_ctx *c) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 2a1094aa35b..c85d290beed 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3232,9 +3232,14 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd, mask = convert_state(mask); val = convert_state(val); - rv = drbd_change_state(mdev, CS_VERBOSE, mask, val); + if (cmd == P_CONN_ST_CHG_REQ) { + rv = conn_request_state(mdev->tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY); + conn_send_sr_reply(mdev->tconn, rv); + } else { + rv = drbd_change_state(mdev, CS_VERBOSE, mask, val); + drbd_send_sr_reply(mdev, rv); + } - drbd_send_sr_reply(mdev, rv); drbd_md_sync(mdev); return true; @@ -3768,6 +3773,7 @@ static struct data_cmd drbd_cmd_handler[] = { [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest }, [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip }, [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync }, + [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state }, /* anything missing from this table is in * the asender_tbl, see get_asender_cmd */ [P_MAX_CMD] = { 0, 0, NULL }, diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index d312d84b841..5fdbdf0be70 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -63,6 +63,7 @@ enum chg_state_flags { CS_SERIALIZE = 8, CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE, CS_NO_CSTATE_CHG = 16, /* Do not display changes in cstate. Internal to drbd_state.c */ + CS_LOCAL_ONLY = 32, /* Do not consider a device pair wide state change */ }; extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev, From df24aa45f4df43e8881c0f80d6a4e2653df7af05 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 15 Feb 2011 11:14:44 +0100 Subject: [PATCH 117/609] drbd: Implemented connection wide state changes That is used for graceful disconnect only Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 28 +++++++-------- drivers/block/drbd/drbd_state.c | 62 +++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 14 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 3d8e63190dc..d6832f8d49a 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1572,6 +1572,7 @@ fail: static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { + struct drbd_tconn *tconn = mdev->tconn; int retcode; struct disconnect dc; @@ -1582,30 +1583,29 @@ static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl } if (dc.force) { - spin_lock_irq(&mdev->tconn->req_lock); - if (mdev->state.conn >= C_WF_CONNECTION) - _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), CS_HARD, NULL); - spin_unlock_irq(&mdev->tconn->req_lock); + spin_lock_irq(&tconn->req_lock); + if (tconn->cstate >= C_WF_CONNECTION) + _conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); + spin_unlock_irq(&tconn->req_lock); goto done; } - retcode = _drbd_request_state(mdev, NS(conn, C_DISCONNECTING), CS_ORDERED); + retcode = conn_request_state(tconn, NS(conn, C_DISCONNECTING), 0); if (retcode == SS_NOTHING_TO_DO) goto done; else if (retcode == SS_ALREADY_STANDALONE) goto done; else if (retcode == SS_PRIMARY_NOP) { - /* Our statche checking code wants to see the peer outdated. */ - retcode = drbd_request_state(mdev, NS2(conn, C_DISCONNECTING, - pdsk, D_OUTDATED)); + /* Our state checking code wants to see the peer outdated. */ + retcode = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, + pdsk, D_OUTDATED), CS_VERBOSE); } else if (retcode == SS_CW_FAILED_BY_PEER) { /* The peer probably wants to see us outdated. */ - retcode = _drbd_request_state(mdev, NS2(conn, C_DISCONNECTING, - disk, D_OUTDATED), - CS_ORDERED); + retcode = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, + disk, D_OUTDATED), 0); if (retcode == SS_IS_DISKLESS || retcode == SS_LOWER_THAN_OUTDATED) { - drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); retcode = SS_SUCCESS; } } @@ -1613,8 +1613,8 @@ static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl if (retcode < SS_SUCCESS) goto fail; - if (wait_event_interruptible(mdev->state_wait, - mdev->state.conn != C_DISCONNECTING)) { + if (wait_event_interruptible(tconn->ping_wait, + tconn->cstate != C_DISCONNECTING)) { /* Do not test for mdev->state.conn == C_STANDALONE, since someone else might connect us in the mean time! */ retcode = ERR_INTR; diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 8c49ca8dea3..d3bf8e39fa5 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1366,6 +1366,61 @@ static int _set_state_itr_fn(int vnr, void *p, void *data) return 0; } +static enum drbd_state_rv +_conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val) +{ + struct _is_valid_itr_params params; + enum drbd_state_rv rv; + + if (test_and_clear_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags)) + return SS_CW_SUCCESS; + + if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags)) + return SS_CW_FAILED_BY_PEER; + + params.flags = CS_NO_CSTATE_CHG; /* öö think */ + params.mask = mask; + params.val = val; + + spin_lock_irq(&tconn->req_lock); + rv = tconn->cstate != C_WF_REPORT_PARAMS ? SS_CW_NO_NEED : SS_UNKNOWN_ERROR; + + if (rv == SS_UNKNOWN_ERROR) + rv = idr_for_each(&tconn->volumes, _is_valid_itr_fn, ¶ms); + + if (rv == 0) /* idr_for_each semantics */ + rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ + + spin_unlock_irq(&tconn->req_lock); + + return rv; +} + +static enum drbd_state_rv +conn_cl_wide(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, + enum chg_state_flags f) +{ + enum drbd_state_rv rv; + + spin_unlock_irq(&tconn->req_lock); + mutex_lock(&tconn->cstate_mutex); + + if (!conn_send_state_req(tconn, mask, val)) { + rv = SS_CW_FAILED_BY_PEER; + /* if (f & CS_VERBOSE) + print_st_err(mdev, os, ns, rv); */ + goto abort; + } + + wait_event(tconn->ping_wait, (rv = _conn_rq_cond(tconn, mask, val))); + +abort: + mutex_unlock(&tconn->cstate_mutex); + spin_lock_irq(&tconn->req_lock); + + return rv; +} + enum drbd_state_rv _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, enum chg_state_flags flags) @@ -1393,6 +1448,13 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ if (rv < SS_SUCCESS) goto abort; + if (oc == C_WF_REPORT_PARAMS && val.conn == C_DISCONNECTING && + !(flags & (CS_LOCAL_ONLY | CS_HARD))) { + rv = conn_cl_wide(tconn, mask, val, flags); + if (rv < SS_SUCCESS) + goto abort; + } + if (params.oc_state == OC_CONSISTENT) { oc = params.oc; print_conn_state_change(tconn, oc, val.conn); From fbe29dec98622369c106ba72279500fb2f5aba99 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 17 Feb 2011 16:38:35 +0100 Subject: [PATCH 118/609] drbd: Rename drbd_submit_ee -> drbd_submit_peer_request Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 5 +++-- drivers/block/drbd/drbd_receiver.c | 13 +++++++------ drivers/block/drbd/drbd_worker.c | 2 +- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index b287bad4767..93c4db3ac67 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1532,8 +1532,9 @@ extern void start_resync_timer_fn(unsigned long data); /* drbd_receiver.c */ extern int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector); -extern int drbd_submit_ee(struct drbd_conf *, struct drbd_peer_request *, - const unsigned, const int); +extern int drbd_submit_peer_request(struct drbd_conf *, + struct drbd_peer_request *, const unsigned, + const int); extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); extern struct drbd_peer_request *drbd_alloc_ee(struct drbd_conf *, u64, sector_t, unsigned int, diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c85d290beed..6b00650d280 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1092,7 +1092,7 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) } /** - * drbd_submit_ee() + * drbd_submit_peer_request() * @mdev: DRBD device. * @peer_req: peer request * @rw: flag field, see bio->bi_rw @@ -1108,8 +1108,9 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) * on certain Xen deployments. */ /* TODO allocate from our own bio_set. */ -int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req, - const unsigned rw, const int fault_type) +int drbd_submit_peer_request(struct drbd_conf *mdev, + struct drbd_peer_request *peer_req, + const unsigned rw, const int fault_type) { struct bio *bios = NULL; struct bio *bio; @@ -1496,7 +1497,7 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si spin_unlock_irq(&mdev->tconn->req_lock); atomic_add(data_size >> 9, &mdev->rs_sect_ev); - if (drbd_submit_ee(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0) + if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0) return true; /* don't care for the reason here */ @@ -1936,7 +1937,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, drbd_al_begin_io(mdev, peer_req->i.sector); } - if (drbd_submit_ee(mdev, peer_req, rw, DRBD_FAULT_DT_WR) == 0) + if (drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR) == 0) return true; /* don't care for the reason here */ @@ -2193,7 +2194,7 @@ submit: list_add_tail(&peer_req->w.list, &mdev->read_ee); spin_unlock_irq(&mdev->tconn->req_lock); - if (drbd_submit_ee(mdev, peer_req, READ, fault_type) == 0) + if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0) return true; /* don't care for the reason here */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 9a9a00eabe0..2da2d23344f 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -365,7 +365,7 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) spin_unlock_irq(&mdev->tconn->req_lock); atomic_add(size >> 9, &mdev->rs_sect_ev); - if (drbd_submit_ee(mdev, peer_req, READ, DRBD_FAULT_RS_RD) == 0) + if (drbd_submit_peer_request(mdev, peer_req, READ, DRBD_FAULT_RS_RD) == 0) return 0; /* If it failed because of ENOMEM, retry should help. If it failed From fcefa62e4c26e70c70b9e8252a4bc9b9031a4182 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 17 Feb 2011 16:46:59 +0100 Subject: [PATCH 119/609] drbd: Rename drbd_endio_{pri,sec} -> drbd_{,peer_}request_endio Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_receiver.c | 13 +++++++------ drivers/block/drbd/drbd_req.c | 4 ++-- drivers/block/drbd/drbd_req.h | 2 +- drivers/block/drbd/drbd_worker.c | 8 ++++---- drivers/block/drbd/drbd_wrappers.h | 4 ++-- 6 files changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 93c4db3ac67..93eb3a7ac71 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -661,7 +661,7 @@ struct drbd_request { /* if local IO is not allowed, will be NULL. * if local IO _is_ allowed, holds the locally submitted bio clone, * or, after local IO completion, the ERR_PTR(error). - * see drbd_endio_pri(). */ + * see drbd_request_endio(). */ struct bio *private_bio; struct drbd_interval i; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6b00650d280..1547c5106ab 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1136,7 +1136,7 @@ next_bio: bio->bi_bdev = mdev->ldev->backing_bdev; bio->bi_rw = rw; bio->bi_private = peer_req; - bio->bi_end_io = drbd_endio_sec; + bio->bi_end_io = drbd_peer_request_endio; bio->bi_next = bios; bios = bio; @@ -1572,7 +1572,7 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd, if (get_ldev(mdev)) { /* data is submitted to disk within recv_resync_read. * corresponding put_ldev done below on error, - * or in drbd_endio_sec. */ + * or in drbd_peer_request_endio. */ ok = recv_resync_read(mdev, sector, data_size); } else { if (__ratelimit(&drbd_ratelimit_state)) @@ -1760,10 +1760,11 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, return drbd_drain_block(mdev, data_size); } - /* get_ldev(mdev) successful. - * Corresponding put_ldev done either below (on various errors), - * or in drbd_endio_sec, if we successfully submit the data at - * the end of this function. */ + /* + * Corresponding put_ldev done either below (on various errors), or in + * drbd_peer_request_endio, if we successfully submit the data at the + * end of this function. + */ sector = be64_to_cpu(p->sector); peer_req = read_in_block(mdev, p->block_id, sector, data_size); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 45a543e5c6a..18eb3d17f17 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -116,7 +116,7 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const drbd_set_in_sync(mdev, req->i.sector, req->i.size); /* one might be tempted to move the drbd_al_complete_io - * to the local io completion callback drbd_endio_pri. + * to the local io completion callback drbd_request_endio. * but, if this was a mirror write, we may only * drbd_al_complete_io after this is RQ_NET_DONE, * otherwise the extent could be dropped from the al @@ -252,7 +252,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) * what we need to do here is just: complete the master_bio. * * local completion error, if any, has been stored as ERR_PTR - * in private_bio within drbd_endio_pri. + * in private_bio within drbd_request_endio. */ int ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK); int error = PTR_ERR(req->private_bio); diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index e6232ce5a1c..e6f2361d6b1 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -230,7 +230,7 @@ static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bi req->private_bio = bio; bio->bi_private = req; - bio->bi_end_io = drbd_endio_pri; + bio->bi_end_io = drbd_request_endio; bio->bi_next = NULL; } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 2da2d23344f..01ab0bc0cd9 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -45,8 +45,8 @@ static int w_make_resync_request(struct drbd_work *w, int cancel); /* endio handlers: * drbd_md_io_complete (defined here) - * drbd_endio_pri (defined here) - * drbd_endio_sec (defined here) + * drbd_request_endio (defined here) + * drbd_peer_request_endio (defined here) * bm_async_io_complete (defined in drbd_bitmap.c) * * For all these callbacks, note the following: @@ -151,7 +151,7 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel /* writes on behalf of the partner, or resync writes, * "submitted" by the receiver. */ -void drbd_endio_sec(struct bio *bio, int error) +void drbd_peer_request_endio(struct bio *bio, int error) { struct drbd_peer_request *peer_req = bio->bi_private; struct drbd_conf *mdev = peer_req->w.mdev; @@ -187,7 +187,7 @@ void drbd_endio_sec(struct bio *bio, int error) /* read, readA or write requests on R_PRIMARY coming from drbd_make_request */ -void drbd_endio_pri(struct bio *bio, int error) +void drbd_request_endio(struct bio *bio, int error) { unsigned long flags; struct drbd_request *req = bio->bi_private; diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h index 151f1a37478..decf9b282e8 100644 --- a/drivers/block/drbd/drbd_wrappers.h +++ b/drivers/block/drbd/drbd_wrappers.h @@ -20,8 +20,8 @@ static inline void drbd_set_my_capacity(struct drbd_conf *mdev, /* bi_end_io handlers */ extern void drbd_md_io_complete(struct bio *bio, int error); -extern void drbd_endio_sec(struct bio *bio, int error); -extern void drbd_endio_pri(struct bio *bio, int error); +extern void drbd_peer_request_endio(struct bio *bio, int error); +extern void drbd_request_endio(struct bio *bio, int error); /* * used to submit our private bio From d0e22a260c5142171c730436664febb045b9f0f0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 17 Feb 2011 18:11:24 +0100 Subject: [PATCH 120/609] drbd: Iterate over all overlapping intervals in a tree Add a macro and helper function for doing that. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_interval.c | 18 ++++++++++++++++++ drivers/block/drbd/drbd_interval.h | 16 ++++++++++++---- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_interval.c b/drivers/block/drbd/drbd_interval.c index 14dbe2dd2d3..0e53f102e68 100644 --- a/drivers/block/drbd/drbd_interval.c +++ b/drivers/block/drbd/drbd_interval.c @@ -157,3 +157,21 @@ drbd_find_overlap(struct rb_root *root, sector_t sector, unsigned int size) } return overlap; } + +struct drbd_interval * +drbd_next_overlap(struct drbd_interval *i, sector_t sector, unsigned int size) +{ + sector_t end = sector + (size >> 9); + struct rb_node *node; + + for (;;) { + node = rb_next(&i->rb); + if (!node) + return NULL; + i = rb_entry(node, struct drbd_interval, rb); + if (i->sector >= end) + return NULL; + if (sector < i->sector + (i->size >> 9)) + return i; + } +} diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h index 4010ad92394..f38fcb00c10 100644 --- a/drivers/block/drbd/drbd_interval.h +++ b/drivers/block/drbd/drbd_interval.h @@ -23,10 +23,18 @@ static inline bool drbd_interval_empty(struct drbd_interval *i) return RB_EMPTY_NODE(&i->rb); } -bool drbd_insert_interval(struct rb_root *, struct drbd_interval *); -bool drbd_contains_interval(struct rb_root *, sector_t, struct drbd_interval *); -void drbd_remove_interval(struct rb_root *, struct drbd_interval *); -struct drbd_interval *drbd_find_overlap(struct rb_root *, sector_t, +extern bool drbd_insert_interval(struct rb_root *, struct drbd_interval *); +extern bool drbd_contains_interval(struct rb_root *, sector_t, + struct drbd_interval *); +extern void drbd_remove_interval(struct rb_root *, struct drbd_interval *); +extern struct drbd_interval *drbd_find_overlap(struct rb_root *, sector_t, + unsigned int); +extern struct drbd_interval *drbd_next_overlap(struct drbd_interval *, sector_t, unsigned int); +#define drbd_for_each_overlap(i, root, sector, size) \ + for (i = drbd_find_overlap(root, sector, size); \ + i; \ + i = drbd_next_overlap(i, sector, size)) + #endif /* __DRBD_INTERVAL_H */ From 8ca9844f105acf6981751e39c1ac1a240afe5a2b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 21 Feb 2011 12:34:58 +0100 Subject: [PATCH 121/609] drbd: Remove obsolete comment Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 18eb3d17f17..157c7374340 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -667,10 +667,9 @@ static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int s return 0; if (mdev->state.disk < D_INCONSISTENT) return 0; - /* state.disk == D_INCONSISTENT We will have a look at the BitMap */ - nr_sectors = drbd_get_capacity(mdev->this_bdev); esector = sector + (size >> 9) - 1; + nr_sectors = drbd_get_capacity(mdev->this_bdev); D_ASSERT(sector < nr_sectors); D_ASSERT(esector < nr_sectors); From c670a398676499913ce72c26a66d204bcbdbc2e9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 21 Feb 2011 12:41:39 +0100 Subject: [PATCH 122/609] drbd: Use the IS_ALIGNED() macro in some more places Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 6 +++--- drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/drbd/drbd_req.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 0748871d6b1..ad618637172 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -767,7 +767,7 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, int wake_up = 0; unsigned long flags; - if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { + if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", (unsigned long long)sector, size); return; @@ -832,7 +832,7 @@ int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, unsigned int enr, count = 0; struct lc_element *e; - if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { + if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { dev_err(DEV, "sector: %llus, size: %d\n", (unsigned long long)sector, size); return 0; @@ -1217,7 +1217,7 @@ void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) sector_t esector, nr_sectors; int wake_up = 0; - if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { + if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", (unsigned long long)sector, size); return; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1547c5106ab..7540b342832 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2038,7 +2038,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, sector = be64_to_cpu(p->sector); size = be32_to_cpu(p->blksize); - if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { + if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, (unsigned long long)sector, size); return false; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 157c7374340..48d313dcae7 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1023,7 +1023,7 @@ int drbd_make_request(struct request_queue *q, struct bio *bio) * what we "blindly" assume: */ D_ASSERT(bio->bi_size > 0); - D_ASSERT((bio->bi_size & 0x1ff) == 0); + D_ASSERT(IS_ALIGNED(bio->bi_size, 512)); D_ASSERT(bio->bi_idx == 0); /* to make some things easier, force alignment of requests within the From 8c387def58351f571cfcad93a3b57dff415b40c0 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 18 Feb 2011 14:13:07 +0100 Subject: [PATCH 123/609] drbd: simplify condition in drbd_may_do_local_read() fold if (x >= (N+1)) return 0; if (x < N) return 0; into if (x != N) return 0; Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 48d313dcae7..733219884ab 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -663,9 +663,7 @@ static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int s if (mdev->state.disk == D_UP_TO_DATE) return 1; - if (mdev->state.disk >= D_OUTDATED) - return 0; - if (mdev->state.disk < D_INCONSISTENT) + if (mdev->state.disk != D_INCONSISTENT) return 0; esector = sector + (size >> 9) - 1; From 867f57483b1759f8cd76ec31ff1f37abde5ad577 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:20:53 +0100 Subject: [PATCH 124/609] drbd: fix typo in comment Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index ad618637172..1ce3de6eed1 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -546,7 +546,7 @@ cancel: } /** - * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents + * drbd_al_apply_to_bm() - Sets the bitmap to dirty(1) where covered by active AL extents * @mdev: DRBD device. */ void drbd_al_apply_to_bm(struct drbd_conf *mdev) From 61610420f764acb835af4a450251dbab2ab6d621 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:20:54 +0100 Subject: [PATCH 125/609] drbd: in drbd_suspend_al, set AL_SUSPENDED before unlocking the activity log As using an empty activity log is the whole point of the excercise, make sure it is still empty when setting AL_SUSPENDED. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index d6832f8d49a..ae8f42e38e4 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -901,19 +901,17 @@ static void drbd_suspend_al(struct drbd_conf *mdev) { int s = 0; - if (lc_try_lock(mdev->act_log)) { - drbd_al_shrink(mdev); - lc_unlock(mdev->act_log); - } else { + if (!lc_try_lock(mdev->act_log)) { dev_warn(DEV, "Failed to lock al in drbd_suspend_al()\n"); return; } + drbd_al_shrink(mdev); spin_lock_irq(&mdev->tconn->req_lock); if (mdev->state.conn < C_CONNECTED) s = !test_and_set_bit(AL_SUSPENDED, &mdev->flags); - spin_unlock_irq(&mdev->tconn->req_lock); + lc_unlock(mdev->act_log); if (s) dev_info(DEV, "Suspended AL updates\n"); From 4738fa16907a933d72bbcae1b8922dc9330fde92 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:20:55 +0100 Subject: [PATCH 126/609] drbd: use clear_bit_unlock() where appropriate Some open-coded clear_bit(); smp_mb__after_clear_bit(); should in fact have been smp_mb__before_clear_bit(); clear_bit(); Instead, use clear_bit_unlock() to annotate the intention, and have it do the right thing. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 3 +-- drivers/block/drbd/drbd_main.c | 3 +-- include/linux/lru_cache.h | 3 +-- lib/lru_cache.c | 10 ++++------ 4 files changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index e8d652f197c..4be73705571 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -219,8 +219,7 @@ static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr) { struct drbd_bitmap *b = mdev->bitmap; void *addr = &page_private(b->bm_pages[page_nr]); - clear_bit(BM_PAGE_IO_LOCK, addr); - smp_mb__after_clear_bit(); + clear_bit_unlock(BM_PAGE_IO_LOCK, addr); wake_up(&mdev->bitmap->bm_io_wait); } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 592f0c949fd..c77e51a4092 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2818,8 +2818,7 @@ static int w_bitmap_io(struct drbd_work *w, int unused) put_ldev(mdev); } - clear_bit(BITMAP_IO, &mdev->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(BITMAP_IO, &mdev->flags); wake_up(&mdev->misc_wait); if (work->done) diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index 7a71ffad037..4cceafb0732 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h @@ -275,8 +275,7 @@ static inline int lc_try_lock(struct lru_cache *lc) */ static inline void lc_unlock(struct lru_cache *lc) { - clear_bit(__LC_DIRTY, &lc->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(__LC_DIRTY, &lc->flags); } static inline int lc_is_used(struct lru_cache *lc, unsigned int enr) diff --git a/lib/lru_cache.c b/lib/lru_cache.c index a07e7268d7e..9f353f7f41c 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -44,8 +44,8 @@ MODULE_LICENSE("GPL"); } while (0) #define RETURN(x...) do { \ - clear_bit(__LC_PARANOIA, &lc->flags); \ - smp_mb__after_clear_bit(); return x ; } while (0) + clear_bit_unlock(__LC_PARANOIA, &lc->flags); \ + return x ; } while (0) /* BUG() if e is not one of the elements tracked by lc */ #define PARANOIA_LC_ELEMENT(lc, e) do { \ @@ -438,8 +438,7 @@ void lc_changed(struct lru_cache *lc, struct lc_element *e) hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number)); lc->changing_element = NULL; lc->new_number = LC_FREE; - clear_bit(__LC_DIRTY, &lc->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(__LC_DIRTY, &lc->flags); RETURN(); } @@ -463,8 +462,7 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) /* move it to the front of LRU. */ list_move(&e->list, &lc->lru); lc->used--; - clear_bit(__LC_STARVING, &lc->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(__LC_STARVING, &lc->flags); } RETURN(e->refcnt); } From 0097f0405d365eff66235f887d47fa0b62b28599 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:20:57 +0100 Subject: [PATCH 127/609] lru_cache.h: fix comments referring to ts_ instead of lc_ For some time we contemplated calling the "struct lru_cache" a "struct tracked_set", and some comments kept the ts_ prefix. Fix those to match the member field names. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- lib/lru_cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lru_cache.c b/lib/lru_cache.c index 9f353f7f41c..4f638b86674 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -378,7 +378,7 @@ struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) /* it was not present in the active set. * we are going to recycle an unused (or even "free") element. * user may need to commit a transaction to record that change. - * we serialize on flags & TF_DIRTY */ + * we serialize on flags & LC_DIRTY */ if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { ++lc->dirty; RETURN(NULL); From a9efc748d679efb39fe7a8a536dde94cee691604 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:20:58 +0100 Subject: [PATCH 128/609] lru_cache: consolidate lc_get and lc_try_get Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- lib/lru_cache.c | 136 ++++++++++++++++++++++++------------------------ 1 file changed, 69 insertions(+), 67 deletions(-) diff --git a/lib/lru_cache.c b/lib/lru_cache.c index 4f638b86674..17621684758 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -308,6 +308,58 @@ static int lc_unused_element_available(struct lru_cache *lc) return 0; } +static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool may_change) +{ + struct lc_element *e; + + PARANOIA_ENTRY(); + if (lc->flags & LC_STARVING) { + ++lc->starving; + RETURN(NULL); + } + + e = lc_find(lc, enr); + if (e) { + ++lc->hits; + if (e->refcnt++ == 0) + lc->used++; + list_move(&e->list, &lc->in_use); /* Not evictable... */ + RETURN(e); + } + + ++lc->misses; + if (!may_change) + RETURN(NULL); + + /* In case there is nothing available and we can not kick out + * the LRU element, we have to wait ... + */ + if (!lc_unused_element_available(lc)) { + __set_bit(__LC_STARVING, &lc->flags); + RETURN(NULL); + } + + /* it was not present in the active set. + * we are going to recycle an unused (or even "free") element. + * user may need to commit a transaction to record that change. + * we serialize on flags & LC_DIRTY */ + if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { + ++lc->dirty; + RETURN(NULL); + } + + e = lc_get_unused_element(lc); + BUG_ON(!e); + + clear_bit(__LC_STARVING, &lc->flags); + BUG_ON(++e->refcnt != 1); + lc->used++; + + lc->changing_element = e; + lc->new_number = enr; + + RETURN(e); +} /** * lc_get - get element by label, maybe change the active set @@ -348,78 +400,28 @@ static int lc_unused_element_available(struct lru_cache *lc) */ struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) { - struct lc_element *e; - - PARANOIA_ENTRY(); - if (lc->flags & LC_STARVING) { - ++lc->starving; - RETURN(NULL); - } - - e = lc_find(lc, enr); - if (e) { - ++lc->hits; - if (e->refcnt++ == 0) - lc->used++; - list_move(&e->list, &lc->in_use); /* Not evictable... */ - RETURN(e); - } - - ++lc->misses; - - /* In case there is nothing available and we can not kick out - * the LRU element, we have to wait ... - */ - if (!lc_unused_element_available(lc)) { - __set_bit(__LC_STARVING, &lc->flags); - RETURN(NULL); - } - - /* it was not present in the active set. - * we are going to recycle an unused (or even "free") element. - * user may need to commit a transaction to record that change. - * we serialize on flags & LC_DIRTY */ - if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { - ++lc->dirty; - RETURN(NULL); - } - - e = lc_get_unused_element(lc); - BUG_ON(!e); - - clear_bit(__LC_STARVING, &lc->flags); - BUG_ON(++e->refcnt != 1); - lc->used++; - - lc->changing_element = e; - lc->new_number = enr; - - RETURN(e); + return __lc_get(lc, enr, 1); } -/* similar to lc_get, - * but only gets a new reference on an existing element. - * you either get the requested element, or NULL. - * will be consolidated into one function. +/** + * lc_try_get - get element by label, if present; do not change the active set + * @lc: the lru cache to operate on + * @enr: the label to look up + * + * Finds an element in the cache, increases its usage count, + * "touches" and returns it. + * + * Return values: + * NULL + * The cache was marked %LC_STARVING, + * or the requested label was not in the active set + * + * pointer to the element with the REQUESTED element number. + * In this case, it can be used right away */ struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) { - struct lc_element *e; - - PARANOIA_ENTRY(); - if (lc->flags & LC_STARVING) { - ++lc->starving; - RETURN(NULL); - } - - e = lc_find(lc, enr); - if (e) { - ++lc->hits; - if (e->refcnt++ == 0) - lc->used++; - list_move(&e->list, &lc->in_use); /* Not evictable... */ - } - RETURN(e); + return __lc_get(lc, enr, 0); } /** From 45dfffebd08c1445493bfa8f0ec05b38714b9b2d Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:21:00 +0100 Subject: [PATCH 129/609] drbd: allow to select specific bitmap pages for writeout We are about to allow several changes to the active set in one activity log transaction. We have to write out the corresponding bitmap pages as well, if changed. Introduce drbd_bm_mark_for_writeout(), then re-use the existing bitmap writeout path to submit all marked pages in one go. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 49 ++++++++++++++++++++++++++++---- drivers/block/drbd/drbd_int.h | 13 ++++++--- 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 4be73705571..bc89c4a30cb 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -188,6 +188,9 @@ void drbd_bm_unlock(struct drbd_conf *mdev) /* to mark for lazy writeout once syncer cleared all clearable bits, * we if bits have been cleared since last IO. */ #define BM_PAGE_LAZY_WRITEOUT 28 +/* pages marked with this "HINT" will be considered for writeout + * on activity log transactions */ +#define BM_PAGE_HINT_WRITEOUT 27 /* store_page_idx uses non-atomic assignment. It is only used directly after * allocating the page. All other bm_set_page_* and bm_clear_page_* need to @@ -237,6 +240,27 @@ static void bm_set_page_need_writeout(struct page *page) set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page)); } +/** + * drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout + * @mdev: DRBD device. + * @page_nr: the bitmap page to mark with the "hint" flag + * + * From within an activity log transaction, we mark a few pages with these + * hints, then call drbd_bm_write_hinted(), which will only write out changed + * pages which are flagged with this mark. + */ +void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr) +{ + struct page *page; + if (page_nr >= mdev->bitmap->bm_number_of_pages) { + dev_warn(DEV, "BAD: page_nr: %u, number_of_pages: %u\n", + page_nr, (int)mdev->bitmap->bm_number_of_pages); + return; + } + page = mdev->bitmap->bm_pages[page_nr]; + set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page)); +} + static int bm_test_page_unchanged(struct page *page) { volatile const unsigned long *addr = &page_private(page); @@ -897,6 +921,7 @@ struct bm_aio_ctx { struct completion done; unsigned flags; #define BM_AIO_COPY_PAGES 1 +#define BM_AIO_WRITE_HINTED 2 int error; }; @@ -1007,13 +1032,13 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must /* * bm_rw: read/write the whole bitmap from/to its on disk location. */ -static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_idx) __must_hold(local) +static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local) { struct bm_aio_ctx ctx = { .mdev = mdev, .in_flight = ATOMIC_INIT(1), .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), - .flags = lazy_writeout_upper_idx ? BM_AIO_COPY_PAGES : 0, + .flags = flags, }; struct drbd_bitmap *b = mdev->bitmap; int num_pages, i, count = 0; @@ -1042,6 +1067,10 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx) break; if (rw & WRITE) { + if ((flags & BM_AIO_WRITE_HINTED) && + !test_and_clear_bit(BM_PAGE_HINT_WRITEOUT, + &page_private(b->bm_pages[i]))) + continue; if (bm_test_page_unchanged(b->bm_pages[i])) { dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i); continue; @@ -1099,7 +1128,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id */ int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) { - return bm_rw(mdev, READ, 0); + return bm_rw(mdev, READ, 0, 0); } /** @@ -1110,7 +1139,7 @@ int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) */ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) { - return bm_rw(mdev, WRITE, 0); + return bm_rw(mdev, WRITE, 0, 0); } /** @@ -1120,12 +1149,20 @@ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) */ int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local) { - return bm_rw(mdev, WRITE, upper_idx); + return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, upper_idx); } +/** + * drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed. + * @mdev: DRBD device. + */ +int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local) +{ + return bm_rw(mdev, WRITE, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0); +} /** - * drbd_bm_write_page: Writes a PAGE_SIZE aligned piece of bitmap + * drbd_bm_write_page() - Writes a PAGE_SIZE aligned piece of bitmap * @mdev: DRBD device. * @idx: bitmap page index * diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 93eb3a7ac71..edfdeb62c18 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1310,11 +1310,14 @@ struct bm_extent { #define SLEEP_TIME (HZ/10) -#define BM_BLOCK_SHIFT 12 /* 4k per bit */ +/* We do bitmap IO in units of 4k blocks. + * We also still have a hardcoded 4k per bit relation. */ +#define BM_BLOCK_SHIFT 12 /* 4k per bit */ #define BM_BLOCK_SIZE (1< Date: Mon, 21 Feb 2011 13:21:01 +0100 Subject: [PATCH 130/609] lru_cache: allow multiple changes per transaction Allow multiple changes to the active set of elements in lru_cache. The only current user of lru_cache, drbd, is driving this generalisation. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 50 ++----- drivers/block/drbd/drbd_nl.c | 4 +- include/linux/lru_cache.h | 70 +++++---- lib/lru_cache.c | 243 +++++++++++++++++++++---------- 4 files changed, 226 insertions(+), 141 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 1ce3de6eed1..44097c87fed 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -175,7 +175,6 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) { struct lc_element *al_ext; struct lc_element *tmp; - unsigned long al_flags = 0; int wake; spin_lock_irq(&mdev->al_lock); @@ -190,19 +189,8 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) return NULL; } } - al_ext = lc_get(mdev->act_log, enr); - al_flags = mdev->act_log->flags; + al_ext = lc_get(mdev->act_log, enr); spin_unlock_irq(&mdev->al_lock); - - /* - if (!al_ext) { - if (al_flags & LC_STARVING) - dev_warn(DEV, "Have to wait for LRU element (AL too small?)\n"); - if (al_flags & LC_DIRTY) - dev_warn(DEV, "Ongoing AL update (AL device too slow?)\n"); - } - */ - return al_ext; } @@ -235,7 +223,7 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) mdev->al_writ_cnt++; spin_lock_irq(&mdev->al_lock); - lc_changed(mdev->act_log, al_ext); + lc_committed(mdev->act_log); spin_unlock_irq(&mdev->al_lock); wake_up(&mdev->al_wait); } @@ -601,7 +589,7 @@ void drbd_al_shrink(struct drbd_conf *mdev) struct lc_element *al_ext; int i; - D_ASSERT(test_bit(__LC_DIRTY, &mdev->act_log->flags)); + D_ASSERT(test_bit(__LC_LOCKED, &mdev->act_log->flags)); for (i = 0; i < mdev->act_log->nr_elements; i++) { al_ext = lc_element_by_index(mdev->act_log, i); @@ -708,7 +696,9 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, } ext->rs_left = rs_left; ext->rs_failed = success ? 0 : count; - lc_changed(mdev->resync, &ext->lce); + /* we don't keep a persistent log of the resync lru, + * we can commit any change right away. */ + lc_committed(mdev->resync); } lc_put(mdev->resync, &ext->lce); /* no race, we are within the al_lock! */ @@ -892,7 +882,7 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) if (bm_ext->lce.lc_number != enr) { bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); bm_ext->rs_failed = 0; - lc_changed(mdev->resync, &bm_ext->lce); + lc_committed(mdev->resync); wakeup = 1; } if (bm_ext->lce.refcnt == 1) @@ -908,7 +898,7 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) if (rs_flags & LC_STARVING) dev_warn(DEV, "Have to wait for element" " (resync LRU too small?)\n"); - BUG_ON(rs_flags & LC_DIRTY); + BUG_ON(rs_flags & LC_LOCKED); } return bm_ext; @@ -916,26 +906,12 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) static int _is_in_al(struct drbd_conf *mdev, unsigned int enr) { - struct lc_element *al_ext; - int rv = 0; + int rv; spin_lock_irq(&mdev->al_lock); - if (unlikely(enr == mdev->act_log->new_number)) - rv = 1; - else { - al_ext = lc_find(mdev->act_log, enr); - if (al_ext) { - if (al_ext->refcnt) - rv = 1; - } - } + rv = lc_is_used(mdev->act_log, enr); spin_unlock_irq(&mdev->al_lock); - /* - if (unlikely(rv)) { - dev_info(DEV, "Delaying sync read until app's write is done\n"); - } - */ return rv; } @@ -1065,13 +1041,13 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) if (rs_flags & LC_STARVING) dev_warn(DEV, "Have to wait for element" " (resync LRU too small?)\n"); - BUG_ON(rs_flags & LC_DIRTY); + BUG_ON(rs_flags & LC_LOCKED); goto try_again; } if (bm_ext->lce.lc_number != enr) { bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); bm_ext->rs_failed = 0; - lc_changed(mdev->resync, &bm_ext->lce); + lc_committed(mdev->resync); wake_up(&mdev->al_wait); D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0); } @@ -1082,8 +1058,6 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) } check_al: for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { - if (unlikely(al_enr+i == mdev->act_log->new_number)) - goto try_again; if (lc_is_used(mdev->act_log, al_enr+i)) goto try_again; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ae8f42e38e4..0a92f5226c2 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -760,7 +760,7 @@ static int drbd_check_al_size(struct drbd_conf *mdev) in_use = 0; t = mdev->act_log; - n = lc_create("act_log", drbd_al_ext_cache, + n = lc_create("act_log", drbd_al_ext_cache, 1, mdev->sync_conf.al_extents, sizeof(struct lc_element), 0); if (n == NULL) { @@ -1016,7 +1016,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } resync_lru = lc_create("resync", drbd_bm_ext_cache, - 61, sizeof(struct bm_extent), + 1, 61, sizeof(struct bm_extent), offsetof(struct bm_extent, lce)); if (!resync_lru) { retcode = ERR_NOMEM; diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index 4cceafb0732..cbafae40c64 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h @@ -166,9 +166,11 @@ struct lc_element { /* if we want to track a larger set of objects, * it needs to become arch independend u64 */ unsigned lc_number; - /* special label when on free list */ #define LC_FREE (~0U) + + /* for pending changes */ + unsigned lc_new_number; }; struct lru_cache { @@ -176,6 +178,7 @@ struct lru_cache { struct list_head lru; struct list_head free; struct list_head in_use; + struct list_head to_be_changed; /* the pre-created kmem cache to allocate the objects from */ struct kmem_cache *lc_cache; @@ -186,7 +189,7 @@ struct lru_cache { size_t element_off; /* number of elements (indices) */ - unsigned int nr_elements; + unsigned int nr_elements; /* Arbitrary limit on maximum tracked objects. Practical limit is much * lower due to allocation failures, probably. For typical use cases, * nr_elements should be a few thousand at most. @@ -194,18 +197,19 @@ struct lru_cache { * 8 high bits of .lc_index to be overloaded with flags in the future. */ #define LC_MAX_ACTIVE (1<<24) + /* allow to accumulate a few (index:label) changes, + * but no more than max_pending_changes */ + unsigned int max_pending_changes; + /* number of elements currently on to_be_changed list */ + unsigned int pending_changes; + /* statistics */ - unsigned used; /* number of lelements currently on in_use list */ - unsigned long hits, misses, starving, dirty, changed; + unsigned used; /* number of elements currently on in_use list */ + unsigned long hits, misses, starving, locked, changed; /* see below: flag-bits for lru_cache */ unsigned long flags; - /* when changing the label of an index element */ - unsigned int new_number; - - /* for paranoia when changing the label of an index element */ - struct lc_element *changing_element; void *lc_private; const char *name; @@ -221,10 +225,15 @@ enum { /* debugging aid, to catch concurrent access early. * user needs to guarantee exclusive access by proper locking! */ __LC_PARANOIA, - /* if we need to change the set, but currently there is a changing - * transaction pending, we are "dirty", and must deferr further - * changing requests */ + + /* annotate that the set is "dirty", possibly accumulating further + * changes, until a transaction is finally triggered */ __LC_DIRTY, + + /* Locked, no further changes allowed. + * Also used to serialize changing transactions. */ + __LC_LOCKED, + /* if we need to change the set, but currently there is no free nor * unused element available, we are "starving", and must not give out * further references, to guarantee that eventually some refcnt will @@ -236,9 +245,11 @@ enum { }; #define LC_PARANOIA (1<<__LC_PARANOIA) #define LC_DIRTY (1<<__LC_DIRTY) +#define LC_LOCKED (1<<__LC_LOCKED) #define LC_STARVING (1<<__LC_STARVING) extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, + unsigned max_pending_changes, unsigned e_count, size_t e_size, size_t e_off); extern void lc_reset(struct lru_cache *lc); extern void lc_destroy(struct lru_cache *lc); @@ -249,7 +260,7 @@ extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr); extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr); extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr); extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e); -extern void lc_changed(struct lru_cache *lc, struct lc_element *e); +extern void lc_committed(struct lru_cache *lc); struct seq_file; extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc); @@ -258,16 +269,28 @@ extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char void (*detail) (struct seq_file *, struct lc_element *)); /** - * lc_try_lock - can be used to stop lc_get() from changing the tracked set + * lc_try_lock_for_transaction - can be used to stop lc_get() from changing the tracked set + * @lc: the lru cache to operate on + * + * Allows (expects) the set to be "dirty". Note that the reference counts and + * order on the active and lru lists may still change. Used to serialize + * changing transactions. Returns true if we aquired the lock. + */ +static inline int lc_try_lock_for_transaction(struct lru_cache *lc) +{ + return !test_and_set_bit(__LC_LOCKED, &lc->flags); +} + +/** + * lc_try_lock - variant to stop lc_get() from changing the tracked set * @lc: the lru cache to operate on * * Note that the reference counts and order on the active and lru lists may - * still change. Returns true if we acquired the lock. + * still change. Only works on a "clean" set. Returns true if we aquired the + * lock, which means there are no pending changes, and any further attempt to + * change the set will not succeed until the next lc_unlock(). */ -static inline int lc_try_lock(struct lru_cache *lc) -{ - return !test_and_set_bit(__LC_DIRTY, &lc->flags); -} +extern int lc_try_lock(struct lru_cache *lc); /** * lc_unlock - unlock @lc, allow lc_get() to change the set again @@ -275,14 +298,11 @@ static inline int lc_try_lock(struct lru_cache *lc) */ static inline void lc_unlock(struct lru_cache *lc) { - clear_bit_unlock(__LC_DIRTY, &lc->flags); + clear_bit(__LC_DIRTY, &lc->flags); + clear_bit_unlock(__LC_LOCKED, &lc->flags); } -static inline int lc_is_used(struct lru_cache *lc, unsigned int enr) -{ - struct lc_element *e = lc_find(lc, enr); - return e && e->refcnt; -} +extern bool lc_is_used(struct lru_cache *lc, unsigned int enr); #define lc_entry(ptr, type, member) \ container_of(ptr, type, member) diff --git a/lib/lru_cache.c b/lib/lru_cache.c index 17621684758..d71d8949894 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -55,9 +55,40 @@ MODULE_LICENSE("GPL"); BUG_ON(i >= lc_->nr_elements); \ BUG_ON(lc_->lc_element[i] != e_); } while (0) + +/* We need to atomically + * - try to grab the lock (set LC_LOCKED) + * - only if there is no pending transaction + * (neither LC_DIRTY nor LC_STARVING is set) + * Because of PARANOIA_ENTRY() above abusing lc->flags as well, + * it is not sufficient to just say + * return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED); + */ +int lc_try_lock(struct lru_cache *lc) +{ + unsigned long val; + do { + val = cmpxchg(&lc->flags, 0, LC_LOCKED); + } while (unlikely (val == LC_PARANOIA)); + /* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */ + return 0 == val; +#if 0 + /* Alternative approach, spin in case someone enters or leaves a + * PARANOIA_ENTRY()/RETURN() section. */ + unsigned long old, new, val; + do { + old = lc->flags & LC_PARANOIA; + new = old | LC_LOCKED; + val = cmpxchg(&lc->flags, old, new); + } while (unlikely (val == (old ^ LC_PARANOIA))); + return old == val; +#endif +} + /** * lc_create - prepares to track objects in an active set * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details + * @max_pending_changes: maximum changes to accumulate until a transaction is required * @e_count: number of elements allowed to be active simultaneously * @e_size: size of the tracked objects * @e_off: offset to the &struct lc_element member in a tracked object @@ -66,6 +97,7 @@ MODULE_LICENSE("GPL"); * or NULL on (allocation) failure. */ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, + unsigned max_pending_changes, unsigned e_count, size_t e_size, size_t e_off) { struct hlist_head *slot = NULL; @@ -98,12 +130,13 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, INIT_LIST_HEAD(&lc->in_use); INIT_LIST_HEAD(&lc->lru); INIT_LIST_HEAD(&lc->free); + INIT_LIST_HEAD(&lc->to_be_changed); lc->name = name; lc->element_size = e_size; lc->element_off = e_off; lc->nr_elements = e_count; - lc->new_number = LC_FREE; + lc->max_pending_changes = max_pending_changes; lc->lc_cache = cache; lc->lc_element = element; lc->lc_slot = slot; @@ -117,6 +150,7 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, e = p + e_off; e->lc_index = i; e->lc_number = LC_FREE; + e->lc_new_number = LC_FREE; list_add(&e->list, &lc->free); element[i] = e; } @@ -175,15 +209,15 @@ void lc_reset(struct lru_cache *lc) INIT_LIST_HEAD(&lc->in_use); INIT_LIST_HEAD(&lc->lru); INIT_LIST_HEAD(&lc->free); + INIT_LIST_HEAD(&lc->to_be_changed); lc->used = 0; lc->hits = 0; lc->misses = 0; lc->starving = 0; - lc->dirty = 0; + lc->locked = 0; lc->changed = 0; + lc->pending_changes = 0; lc->flags = 0; - lc->changing_element = NULL; - lc->new_number = LC_FREE; memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements); for (i = 0; i < lc->nr_elements; i++) { @@ -194,6 +228,7 @@ void lc_reset(struct lru_cache *lc) /* re-init it */ e->lc_index = i; e->lc_number = LC_FREE; + e->lc_new_number = LC_FREE; list_add(&e->list, &lc->free); } } @@ -208,14 +243,14 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) /* NOTE: * total calls to lc_get are * (starving + hits + misses) - * misses include "dirty" count (update from an other thread in + * misses include "locked" count (update from an other thread in * progress) and "changed", when this in fact lead to an successful * update of the cache. */ return seq_printf(seq, "\t%s: used:%u/%u " - "hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n", + "hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n", lc->name, lc->used, lc->nr_elements, - lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed); + lc->hits, lc->misses, lc->starving, lc->locked, lc->changed); } static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) @@ -224,6 +259,27 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) } +static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr, + bool include_changing) +{ + struct hlist_node *n; + struct lc_element *e; + + BUG_ON(!lc); + BUG_ON(!lc->nr_elements); + hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { + /* "about to be changed" elements, pending transaction commit, + * are hashed by their "new number". "Normal" elements have + * lc_number == lc_new_number. */ + if (e->lc_new_number != enr) + continue; + if (e->lc_new_number == e->lc_number || include_changing) + return e; + break; + } + return NULL; +} + /** * lc_find - find element by label, if present in the hash table * @lc: The lru_cache object @@ -232,38 +288,28 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) * Returns the pointer to an element, if the element with the requested * "label" or element number is present in the hash table, * or NULL if not found. Does not change the refcnt. + * Ignores elements that are "about to be used", i.e. not yet in the active + * set, but still pending transaction commit. */ struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) { - struct hlist_node *n; - struct lc_element *e; - - BUG_ON(!lc); - BUG_ON(!lc->nr_elements); - hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { - if (e->lc_number == enr) - return e; - } - return NULL; + return __lc_find(lc, enr, 0); } -/* returned element will be "recycled" immediately */ -static struct lc_element *lc_evict(struct lru_cache *lc) +/** + * lc_is_used - find element by label + * @lc: The lru_cache object + * @enr: element number + * + * Returns true, if the element with the requested "label" or element number is + * present in the hash table, and is used (refcnt > 0). + * Also finds elements that are not _currently_ used but only "about to be + * used", i.e. on the "to_be_changed" list, pending transaction commit. + */ +bool lc_is_used(struct lru_cache *lc, unsigned int enr) { - struct list_head *n; - struct lc_element *e; - - if (list_empty(&lc->lru)) - return NULL; - - n = lc->lru.prev; - e = list_entry(n, struct lc_element, list); - - PARANOIA_LC_ELEMENT(lc, e); - - list_del(&e->list); - hlist_del(&e->colision); - return e; + struct lc_element *e = __lc_find(lc, enr, 1); + return e && e->refcnt; } /** @@ -280,22 +326,34 @@ void lc_del(struct lru_cache *lc, struct lc_element *e) PARANOIA_LC_ELEMENT(lc, e); BUG_ON(e->refcnt); - e->lc_number = LC_FREE; + e->lc_number = e->lc_new_number = LC_FREE; hlist_del_init(&e->colision); list_move(&e->list, &lc->free); RETURN(); } -static struct lc_element *lc_get_unused_element(struct lru_cache *lc) +static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number) { struct list_head *n; + struct lc_element *e; - if (list_empty(&lc->free)) - return lc_evict(lc); + if (!list_empty(&lc->free)) + n = lc->free.next; + else if (!list_empty(&lc->lru)) + n = lc->lru.prev; + else + return NULL; - n = lc->free.next; - list_del(n); - return list_entry(n, struct lc_element, list); + e = list_entry(n, struct lc_element, list); + PARANOIA_LC_ELEMENT(lc, e); + + e->lc_new_number = new_number; + if (!hlist_unhashed(&e->colision)) + __hlist_del(&e->colision); + hlist_add_head(&e->colision, lc_hash_slot(lc, new_number)); + list_move(&e->list, &lc->to_be_changed); + + return e; } static int lc_unused_element_available(struct lru_cache *lc) @@ -318,8 +376,12 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool RETURN(NULL); } - e = lc_find(lc, enr); - if (e) { + e = __lc_find(lc, enr, 1); + /* if lc_new_number != lc_number, + * this enr is currently being pulled in already, + * and will be available once the pending transaction + * has been committed. */ + if (e && e->lc_new_number == e->lc_number) { ++lc->hits; if (e->refcnt++ == 0) lc->used++; @@ -331,6 +393,24 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool if (!may_change) RETURN(NULL); + /* It has been found above, but on the "to_be_changed" list, not yet + * committed. Don't pull it in twice, wait for the transaction, then + * try again */ + if (e) + RETURN(NULL); + + /* To avoid races with lc_try_lock(), first, mark us dirty + * (using test_and_set_bit, as it implies memory barriers), ... */ + test_and_set_bit(__LC_DIRTY, &lc->flags); + + /* ... only then check if it is locked anyways. If lc_unlock clears + * the dirty bit again, that's not a problem, we will come here again. + */ + if (test_bit(__LC_LOCKED, &lc->flags)) { + ++lc->locked; + RETURN(NULL); + } + /* In case there is nothing available and we can not kick out * the LRU element, we have to wait ... */ @@ -339,24 +419,19 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool RETURN(NULL); } - /* it was not present in the active set. - * we are going to recycle an unused (or even "free") element. - * user may need to commit a transaction to record that change. - * we serialize on flags & LC_DIRTY */ - if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { - ++lc->dirty; + /* It was not present in the active set. We are going to recycle an + * unused (or even "free") element, but we won't accumulate more than + * max_pending_changes changes. */ + if (lc->pending_changes >= lc->max_pending_changes) RETURN(NULL); - } - e = lc_get_unused_element(lc); + e = lc_prepare_for_change(lc, enr); BUG_ON(!e); clear_bit(__LC_STARVING, &lc->flags); BUG_ON(++e->refcnt != 1); lc->used++; - - lc->changing_element = e; - lc->new_number = enr; + lc->pending_changes++; RETURN(e); } @@ -388,12 +463,15 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool * pointer to an UNUSED element with some different element number, * where that different number may also be %LC_FREE. * - * In this case, the cache is marked %LC_DIRTY (blocking further changes), - * and the returned element pointer is removed from the lru list and - * hash collision chains. The user now should do whatever housekeeping - * is necessary. - * Then he must call lc_changed(lc,element_pointer), to finish - * the change. + * In this case, the cache is marked %LC_DIRTY, + * so lc_try_lock() will no longer succeed. + * The returned element pointer is moved to the "to_be_changed" list, + * and registered with the new element number on the hash collision chains, + * so it is possible to pick it up from lc_is_used(). + * Up to "max_pending_changes" (see lc_create()) can be accumulated. + * The user now should do whatever housekeeping is necessary, + * typically serialize on lc_try_lock_for_transaction(), then call + * lc_committed(lc) and lc_unlock(), to finish the change. * * NOTE: The user needs to check the lc_number on EACH use, so he recognizes * any cache set change. @@ -425,22 +503,25 @@ struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) } /** - * lc_changed - tell @lc that the change has been recorded + * lc_committed - tell @lc that pending changes have been recorded * @lc: the lru cache to operate on - * @e: the element pending label change + * + * User is expected to serialize on explicit lc_try_lock_for_transaction() + * before the transaction is started, and later needs to lc_unlock() explicitly + * as well. */ -void lc_changed(struct lru_cache *lc, struct lc_element *e) +void lc_committed(struct lru_cache *lc) { + struct lc_element *e, *tmp; + PARANOIA_ENTRY(); - BUG_ON(e != lc->changing_element); - PARANOIA_LC_ELEMENT(lc, e); - ++lc->changed; - e->lc_number = lc->new_number; - list_add(&e->list, &lc->in_use); - hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number)); - lc->changing_element = NULL; - lc->new_number = LC_FREE; - clear_bit_unlock(__LC_DIRTY, &lc->flags); + list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) { + /* count number of changes, not number of transactions */ + ++lc->changed; + e->lc_number = e->lc_new_number; + list_move(&e->list, &lc->in_use); + } + lc->pending_changes = 0; RETURN(); } @@ -459,7 +540,7 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) PARANOIA_ENTRY(); PARANOIA_LC_ELEMENT(lc, e); BUG_ON(e->refcnt == 0); - BUG_ON(e == lc->changing_element); + BUG_ON(e->lc_number != e->lc_new_number); if (--e->refcnt == 0) { /* move it to the front of LRU. */ list_move(&e->list, &lc->lru); @@ -504,16 +585,24 @@ unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e) void lc_set(struct lru_cache *lc, unsigned int enr, int index) { struct lc_element *e; + struct list_head *lh; if (index < 0 || index >= lc->nr_elements) return; e = lc_element_by_index(lc, index); - e->lc_number = enr; + BUG_ON(e->lc_number != e->lc_new_number); + BUG_ON(e->refcnt != 0); + e->lc_number = e->lc_new_number = enr; hlist_del_init(&e->colision); - hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); - list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru); + if (enr == LC_FREE) + lh = &lc->free; + else { + hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); + lh = &lc->lru; + } + list_move(&e->list, lh); } /** @@ -553,8 +642,10 @@ EXPORT_SYMBOL(lc_try_get); EXPORT_SYMBOL(lc_find); EXPORT_SYMBOL(lc_get); EXPORT_SYMBOL(lc_put); -EXPORT_SYMBOL(lc_changed); +EXPORT_SYMBOL(lc_committed); EXPORT_SYMBOL(lc_element_by_index); EXPORT_SYMBOL(lc_index_of); EXPORT_SYMBOL(lc_seq_printf_stats); EXPORT_SYMBOL(lc_seq_dump_details); +EXPORT_SYMBOL(lc_try_lock); +EXPORT_SYMBOL(lc_is_used); From 7ad651b52218eea3f9280dbb353dfe0c42742d85 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:21:03 +0100 Subject: [PATCH 131/609] drbd: new on-disk activity log transaction format Use a new on-disk transaction format for the activity log, which allows for multiple changes to the active set per transaction. Using 4k transaction blocks, we can now get rid of the work-around code to deal with devices not supporting 512 byte logical block size. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 409 +++++++++++++++++++------------ drivers/block/drbd/drbd_int.h | 40 ++- drivers/block/drbd/drbd_main.c | 4 - drivers/block/drbd/drbd_nl.c | 42 +--- include/linux/drbd.h | 4 + include/linux/drbd_limits.h | 8 +- 6 files changed, 300 insertions(+), 207 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 44097c87fed..ea3895de4e6 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -24,21 +24,67 @@ */ #include +#include #include +#include +#include #include "drbd_int.h" #include "drbd_wrappers.h" -/* We maintain a trivial checksum in our on disk activity log. - * With that we can ensure correct operation even when the storage - * device might do a partial (last) sector write while losing power. - */ -struct __packed al_transaction { - u32 magic; - u32 tr_number; - struct __packed { - u32 pos; - u32 extent; } updates[1 + AL_EXTENTS_PT]; - u32 xor_sum; +/* all fields on disc in big endian */ +struct __packed al_transaction_on_disk { + /* don't we all like magic */ + __be32 magic; + + /* to identify the most recent transaction block + * in the on disk ring buffer */ + __be32 tr_number; + + /* checksum on the full 4k block, with this field set to 0. */ + __be32 crc32c; + + /* type of transaction, special transaction types like: + * purge-all, set-all-idle, set-all-active, ... to-be-defined */ + __be16 transaction_type; + + /* we currently allow only a few thousand extents, + * so 16bit will be enough for the slot number. */ + + /* how many updates in this transaction */ + __be16 n_updates; + + /* maximum slot number, "al-extents" in drbd.conf speak. + * Having this in each transaction should make reconfiguration + * of that parameter easier. */ + __be16 context_size; + + /* slot number the context starts with */ + __be16 context_start_slot_nr; + + /* Some reserved bytes. Expected usage is a 64bit counter of + * sectors-written since device creation, and other data generation tag + * supporting usage */ + __be32 __reserved[4]; + + /* --- 36 byte used --- */ + + /* Reserve space for up to AL_UPDATES_PER_TRANSACTION changes + * in one transaction, then use the remaining byte in the 4k block for + * context information. "Flexible" number of updates per transaction + * does not help, as we have to account for the case when all update + * slots are used anyways, so it would only complicate code without + * additional benefit. + */ + __be16 update_slot_nr[AL_UPDATES_PER_TRANSACTION]; + + /* but the extent number is 32bit, which at an extent size of 4 MiB + * allows to cover device sizes of up to 2**54 Byte (16 PiB) */ + __be32 update_extent_nr[AL_UPDATES_PER_TRANSACTION]; + + /* --- 420 bytes used (36 + 64*6) --- */ + + /* 4096 - 420 = 3676 = 919 * 4 */ + __be32 context[AL_CONTEXT_PER_TRANSACTION]; }; struct update_odbm_work { @@ -48,11 +94,8 @@ struct update_odbm_work { struct update_al_work { struct drbd_work w; - struct lc_element *al_ext; struct completion event; - unsigned int enr; - /* if old_enr != LC_FREE, write corresponding bitmap sector, too */ - unsigned int old_enr; + int err; }; struct drbd_atodb_wait { @@ -107,67 +150,30 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, sector_t sector, int rw) { - int logical_block_size, mask, ok; - int offset = 0; + int ok; struct page *iop = mdev->md_io_page; D_ASSERT(mutex_is_locked(&mdev->md_io_mutex)); BUG_ON(!bdev->md_bdev); - logical_block_size = bdev_logical_block_size(bdev->md_bdev); - if (logical_block_size == 0) - logical_block_size = MD_SECTOR_SIZE; - - /* in case logical_block_size != 512 [ s390 only? ] */ - if (logical_block_size != MD_SECTOR_SIZE) { - mask = (logical_block_size / MD_SECTOR_SIZE) - 1; - D_ASSERT(mask == 1 || mask == 3 || mask == 7); - D_ASSERT(logical_block_size == (mask+1) * MD_SECTOR_SIZE); - offset = sector & mask; - sector = sector & ~mask; - iop = mdev->md_io_tmpp; - - if (rw & WRITE) { - /* these are GFP_KERNEL pages, pre-allocated - * on device initialization */ - void *p = page_address(mdev->md_io_page); - void *hp = page_address(mdev->md_io_tmpp); - - ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, - READ, logical_block_size); - - if (unlikely(!ok)) { - dev_err(DEV, "drbd_md_sync_page_io(,%llus," - "READ [logical_block_size!=512]) failed!\n", - (unsigned long long)sector); - return 0; - } - - memcpy(hp + offset*MD_SECTOR_SIZE, p, MD_SECTOR_SIZE); - } - } + dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s)\n", + current->comm, current->pid, __func__, + (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); if (sector < drbd_md_first_sector(bdev) || - sector > drbd_md_last_sector(bdev)) + sector + 7 > drbd_md_last_sector(bdev)) dev_alert(DEV, "%s [%d]:%s(,%llus,%s) out of range md access!\n", current->comm, current->pid, __func__, (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); - ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, logical_block_size); + ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, MD_BLOCK_SIZE); if (unlikely(!ok)) { dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed!\n", (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); return 0; } - if (logical_block_size != MD_SECTOR_SIZE && !(rw & WRITE)) { - void *p = page_address(mdev->md_io_page); - void *hp = page_address(mdev->md_io_tmpp); - - memcpy(p, hp + offset*MD_SECTOR_SIZE, MD_SECTOR_SIZE); - } - return ok; } @@ -211,20 +217,34 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) * current->bio_tail list now. * we have to delegate updates to the activity log * to the worker thread. */ - init_completion(&al_work.event); - al_work.al_ext = al_ext; - al_work.enr = enr; - al_work.old_enr = al_ext->lc_number; - al_work.w.cb = w_al_write_transaction; - al_work.w.mdev = mdev; - drbd_queue_work_front(&mdev->tconn->data.work, &al_work.w); - wait_for_completion(&al_work.event); - mdev->al_writ_cnt++; + /* Serialize multiple transactions. + * This uses test_and_set_bit, memory barrier is implicit. + * Optimization potential: + * first check for transaction number > old transaction number, + * so not all waiters have to lock/unlock. */ + wait_event(mdev->al_wait, lc_try_lock_for_transaction(mdev->act_log)); - spin_lock_irq(&mdev->al_lock); - lc_committed(mdev->act_log); - spin_unlock_irq(&mdev->al_lock); + /* Double check: it may have been committed by someone else, + * while we have been waiting for the lock. */ + if (al_ext->lc_number != enr) { + init_completion(&al_work.event); + al_work.w.cb = w_al_write_transaction; + al_work.w.mdev = mdev; + drbd_queue_work_front(&mdev->tconn->data.work, &al_work.w); + wait_for_completion(&al_work.event); + + mdev->al_writ_cnt++; + + spin_lock_irq(&mdev->al_lock); + /* FIXME + if (al_work.err) + we need an "lc_cancel" here; + */ + lc_committed(mdev->act_log); + spin_unlock_irq(&mdev->al_lock); + } + lc_unlock(mdev->act_log); wake_up(&mdev->al_wait); } } @@ -283,95 +303,118 @@ w_al_write_transaction(struct drbd_work *w, int unused) { struct update_al_work *aw = container_of(w, struct update_al_work, w); struct drbd_conf *mdev = w->mdev; - struct lc_element *updated = aw->al_ext; - const unsigned int new_enr = aw->enr; - const unsigned int evicted = aw->old_enr; - struct al_transaction *buffer; + struct al_transaction_on_disk *buffer; + struct lc_element *e; sector_t sector; - int i, n, mx; - unsigned int extent_nr; - u32 xor_sum = 0; + int i, mx; + unsigned extent_nr; + unsigned crc = 0; if (!get_ldev(mdev)) { - dev_err(DEV, - "disk is %s, cannot start al transaction (-%d +%d)\n", - drbd_disk_str(mdev->state.disk), evicted, new_enr); + dev_err(DEV, "disk is %s, cannot start al transaction\n", + drbd_disk_str(mdev->state.disk)); + aw->err = -EIO; complete(&((struct update_al_work *)w)->event); return 1; } - /* do we have to do a bitmap write, first? - * TODO reduce maximum latency: - * submit both bios, then wait for both, - * instead of doing two synchronous sector writes. - * For now, we must not write the transaction, - * if we cannot write out the bitmap of the evicted extent. */ - if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE) - drbd_bm_write_page(mdev, al_extent_to_bm_page(evicted)); /* The bitmap write may have failed, causing a state change. */ if (mdev->state.disk < D_INCONSISTENT) { dev_err(DEV, - "disk is %s, cannot write al transaction (-%d +%d)\n", - drbd_disk_str(mdev->state.disk), evicted, new_enr); + "disk is %s, cannot write al transaction\n", + drbd_disk_str(mdev->state.disk)); + aw->err = -EIO; complete(&((struct update_al_work *)w)->event); put_ldev(mdev); return 1; } mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */ - buffer = (struct al_transaction *)page_address(mdev->md_io_page); + buffer = page_address(mdev->md_io_page); - buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC); + memset(buffer, 0, sizeof(*buffer)); + buffer->magic = cpu_to_be32(DRBD_AL_MAGIC); buffer->tr_number = cpu_to_be32(mdev->al_tr_number); - n = lc_index_of(mdev->act_log, updated); + i = 0; - buffer->updates[0].pos = cpu_to_be32(n); - buffer->updates[0].extent = cpu_to_be32(new_enr); + /* Even though no one can start to change this list + * once we set the LC_LOCKED -- from drbd_al_begin_io(), + * lc_try_lock_for_transaction() --, someone may still + * be in the process of changing it. */ + spin_lock_irq(&mdev->al_lock); + list_for_each_entry(e, &mdev->act_log->to_be_changed, list) { + if (i == AL_UPDATES_PER_TRANSACTION) { + i++; + break; + } + buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index); + buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number); + if (e->lc_number != LC_FREE) + drbd_bm_mark_for_writeout(mdev, + al_extent_to_bm_page(e->lc_number)); + i++; + } + spin_unlock_irq(&mdev->al_lock); + BUG_ON(i > AL_UPDATES_PER_TRANSACTION); - xor_sum ^= new_enr; + buffer->n_updates = cpu_to_be16(i); + for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) { + buffer->update_slot_nr[i] = cpu_to_be16(-1); + buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE); + } - mx = min_t(int, AL_EXTENTS_PT, + buffer->context_size = cpu_to_be16(mdev->act_log->nr_elements); + buffer->context_start_slot_nr = cpu_to_be16(mdev->al_tr_cycle); + + mx = min_t(int, AL_CONTEXT_PER_TRANSACTION, mdev->act_log->nr_elements - mdev->al_tr_cycle); for (i = 0; i < mx; i++) { unsigned idx = mdev->al_tr_cycle + i; extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number; - buffer->updates[i+1].pos = cpu_to_be32(idx); - buffer->updates[i+1].extent = cpu_to_be32(extent_nr); - xor_sum ^= extent_nr; + buffer->context[i] = cpu_to_be32(extent_nr); } - for (; i < AL_EXTENTS_PT; i++) { - buffer->updates[i+1].pos = __constant_cpu_to_be32(-1); - buffer->updates[i+1].extent = __constant_cpu_to_be32(LC_FREE); - xor_sum ^= LC_FREE; - } - mdev->al_tr_cycle += AL_EXTENTS_PT; + for (; i < AL_CONTEXT_PER_TRANSACTION; i++) + buffer->context[i] = cpu_to_be32(LC_FREE); + + mdev->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION; if (mdev->al_tr_cycle >= mdev->act_log->nr_elements) mdev->al_tr_cycle = 0; - buffer->xor_sum = cpu_to_be32(xor_sum); - sector = mdev->ldev->md.md_offset - + mdev->ldev->md.al_offset + mdev->al_tr_pos; + + mdev->ldev->md.al_offset + + mdev->al_tr_pos * (MD_BLOCK_SIZE>>9); - if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) + crc = crc32c(0, buffer, 4096); + buffer->crc32c = cpu_to_be32(crc); + + if (drbd_bm_write_hinted(mdev)) + aw->err = -EIO; + /* drbd_chk_io_error done already */ + else if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { + aw->err = -EIO; drbd_chk_io_error(mdev, 1, true); - - if (++mdev->al_tr_pos > - div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) - mdev->al_tr_pos = 0; - - D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE); - mdev->al_tr_number++; + } else { + /* advance ringbuffer position and transaction counter */ + mdev->al_tr_pos = (mdev->al_tr_pos + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); + mdev->al_tr_number++; + } mutex_unlock(&mdev->md_io_mutex); - complete(&((struct update_al_work *)w)->event); put_ldev(mdev); return 1; } +/* FIXME + * reading of the activity log, + * and potentially dirtying of the affected bitmap regions, + * should be done from userland only. + * DRBD would simply always attach with an empty activity log, + * and refuse to attach to something that looks like a crashed primary. + */ + /** * drbd_al_read_tr() - Read a single transaction from the on disk activity log * @mdev: DRBD device. @@ -383,27 +426,39 @@ w_al_write_transaction(struct drbd_work *w, int unused) */ static int drbd_al_read_tr(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, - struct al_transaction *b, int index) { + struct al_transaction_on_disk *b = page_address(mdev->md_io_page); sector_t sector; - int rv, i; - u32 xor_sum = 0; + u32 crc; - sector = bdev->md.md_offset + bdev->md.al_offset + index; + sector = bdev->md.md_offset + + bdev->md.al_offset + + index * (MD_BLOCK_SIZE>>9); /* Dont process error normally, * as this is done before disk is attached! */ if (!drbd_md_sync_page_io(mdev, bdev, sector, READ)) return -1; - rv = (b->magic == cpu_to_be32(DRBD_MAGIC)); + if (!expect(b->magic == cpu_to_be32(DRBD_AL_MAGIC))) + return 0; - for (i = 0; i < AL_EXTENTS_PT + 1; i++) - xor_sum ^= be32_to_cpu(b->updates[i].extent); - rv &= (xor_sum == be32_to_cpu(b->xor_sum)); + if (!expect(be16_to_cpu(b->n_updates) <= AL_UPDATES_PER_TRANSACTION)) + return 0; - return rv; + if (!expect(be16_to_cpu(b->context_size) <= DRBD_AL_EXTENTS_MAX)) + return 0; + + if (!expect(be16_to_cpu(b->context_start_slot_nr) < DRBD_AL_EXTENTS_MAX)) + return 0; + + crc = be32_to_cpu(b->crc32c); + b->crc32c = 0; + if (!expect(crc == crc32c(0, b, 4096))) + return 0; + + return 1; } /** @@ -415,7 +470,7 @@ static int drbd_al_read_tr(struct drbd_conf *mdev, */ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) { - struct al_transaction *buffer; + struct al_transaction_on_disk *b; int i; int rv; int mx; @@ -428,25 +483,36 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) u32 to_tnr = 0; u32 cnr; - mx = div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT); + /* Note that this is expected to be called with a newly created, + * clean and all unused activity log of the "expected size". + */ /* lock out all other meta data io for now, * and make sure the page is mapped. */ mutex_lock(&mdev->md_io_mutex); - buffer = page_address(mdev->md_io_page); + b = page_address(mdev->md_io_page); + + /* Always use the full ringbuffer space for now. + * possible optimization: read in all of it, + * then scan the in-memory pages. */ + + mx = (MD_AL_SECTORS*512/MD_BLOCK_SIZE); /* Find the valid transaction in the log */ - for (i = 0; i <= mx; i++) { - rv = drbd_al_read_tr(mdev, bdev, buffer, i); + for (i = 0; i < mx; i++) { + rv = drbd_al_read_tr(mdev, bdev, i); + /* invalid data in that block */ if (rv == 0) continue; + + /* IO error */ if (rv == -1) { mutex_unlock(&mdev->md_io_mutex); return 0; } - cnr = be32_to_cpu(buffer->tr_number); + cnr = be32_to_cpu(b->tr_number); if (++found_valid == 1) { from = i; to = i; @@ -454,8 +520,11 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) to_tnr = cnr; continue; } + + D_ASSERT(cnr != to_tnr); + D_ASSERT(cnr != from_tnr); if ((int)cnr - (int)from_tnr < 0) { - D_ASSERT(from_tnr - cnr + i - from == mx+1); + D_ASSERT(from_tnr - cnr + i - from == mx); from = i; from_tnr = cnr; } @@ -476,11 +545,10 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) * dev_info(DEV, "Reading from %d to %d.\n",from,to); */ i = from; while (1) { - int j, pos; - unsigned int extent_nr; - unsigned int trn; + struct lc_element *e; + unsigned j, n, slot, extent_nr; - rv = drbd_al_read_tr(mdev, bdev, buffer, i); + rv = drbd_al_read_tr(mdev, bdev, i); if (!expect(rv != 0)) goto cancel; if (rv == -1) { @@ -488,23 +556,55 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) return 0; } - trn = be32_to_cpu(buffer->tr_number); + /* deal with different transaction types. + * not yet implemented */ + if (!expect(b->transaction_type == 0)) + goto cancel; + /* on the fly re-create/resize activity log? + * will be a special transaction type flag. */ + if (!expect(be16_to_cpu(b->context_size) == mdev->act_log->nr_elements)) + goto cancel; + if (!expect(be16_to_cpu(b->context_start_slot_nr) < mdev->act_log->nr_elements)) + goto cancel; + + /* We are the only user of the activity log right now, + * don't actually need to take that lock. */ spin_lock_irq(&mdev->al_lock); - /* This loop runs backwards because in the cyclic - elements there might be an old version of the - updated element (in slot 0). So the element in slot 0 - can overwrite old versions. */ - for (j = AL_EXTENTS_PT; j >= 0; j--) { - pos = be32_to_cpu(buffer->updates[j].pos); - extent_nr = be32_to_cpu(buffer->updates[j].extent); + /* first, apply the context, ... */ + for (j = 0, slot = be16_to_cpu(b->context_start_slot_nr); + j < AL_CONTEXT_PER_TRANSACTION && + slot < mdev->act_log->nr_elements; j++, slot++) { + extent_nr = be32_to_cpu(b->context[j]); + e = lc_element_by_index(mdev->act_log, slot); + if (e->lc_number != extent_nr) { + if (extent_nr != LC_FREE) + active_extents++; + else + active_extents--; + } + lc_set(mdev->act_log, extent_nr, slot); + } - if (extent_nr == LC_FREE) - continue; - - lc_set(mdev->act_log, extent_nr, pos); - active_extents++; + /* ... then apply the updates, + * which override the context information. + * drbd_al_read_tr already did the rangecheck + * on n <= AL_UPDATES_PER_TRANSACTION */ + n = be16_to_cpu(b->n_updates); + for (j = 0; j < n; j++) { + slot = be16_to_cpu(b->update_slot_nr[j]); + extent_nr = be32_to_cpu(b->update_extent_nr[j]); + if (!expect(slot < mdev->act_log->nr_elements)) + break; + e = lc_element_by_index(mdev->act_log, slot); + if (e->lc_number != extent_nr) { + if (extent_nr != LC_FREE) + active_extents++; + else + active_extents--; + } + lc_set(mdev->act_log, extent_nr, slot); } spin_unlock_irq(&mdev->al_lock); @@ -514,15 +614,12 @@ cancel: if (i == to) break; i++; - if (i > mx) + if (i >= mx) i = 0; } mdev->al_tr_number = to_tnr+1; - mdev->al_tr_pos = to; - if (++mdev->al_tr_pos > - div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) - mdev->al_tr_pos = 0; + mdev->al_tr_pos = (to + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); /* ok, we are done with it */ mutex_unlock(&mdev->md_io_mutex); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index edfdeb62c18..3213808a898 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1069,7 +1069,6 @@ struct drbd_conf { atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */ wait_queue_head_t ee_wait; struct page *md_io_page; /* one page buffer for md_io */ - struct page *md_io_tmpp; /* for logical_block_size != 512 */ struct mutex md_io_mutex; /* protects the md_io_buffer */ spinlock_t al_lock; wait_queue_head_t al_wait; @@ -1259,22 +1258,39 @@ extern void drbd_ldev_destroy(struct drbd_conf *mdev); * either at the end of the backing device * or on a separate meta data device. */ -#define MD_RESERVED_SECT (128LU << 11) /* 128 MB, unit sectors */ /* The following numbers are sectors */ -#define MD_AL_OFFSET 8 /* 8 Sectors after start of meta area */ -#define MD_AL_MAX_SIZE 64 /* = 32 kb LOG ~ 3776 extents ~ 14 GB Storage */ -/* Allows up to about 3.8TB */ -#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_MAX_SIZE) +/* Allows up to about 3.8TB, so if you want more, + * you need to use the "flexible" meta data format. */ +#define MD_RESERVED_SECT (128LU << 11) /* 128 MB, unit sectors */ +#define MD_AL_OFFSET 8 /* 8 Sectors after start of meta area */ +#define MD_AL_SECTORS 64 /* = 32 kB on disk activity log ring buffer */ +#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_SECTORS) -/* Since the smalles IO unit is usually 512 byte */ -#define MD_SECTOR_SHIFT 9 -#define MD_SECTOR_SIZE (1<ldev); mdev->ldev = NULL;); - if (mdev->md_io_tmpp) { - __free_page(mdev->md_io_tmpp); - mdev->md_io_tmpp = NULL; - } clear_bit(GO_DISKLESS, &mdev->flags); } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 0a92f5226c2..90d73172320 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -527,7 +527,7 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, case DRBD_MD_INDEX_FLEX_INT: bdev->md.md_offset = drbd_md_ss__(mdev, bdev); /* al size is still fixed */ - bdev->md.al_offset = -MD_AL_MAX_SIZE; + bdev->md.al_offset = -MD_AL_SECTORS; /* we need (slightly less than) ~ this much bitmap sectors: */ md_size_sect = drbd_get_capacity(bdev->backing_bdev); md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT); @@ -751,8 +751,8 @@ static int drbd_check_al_size(struct drbd_conf *mdev) unsigned int in_use; int i; - if (!expect(mdev->sync_conf.al_extents >= 7)) - mdev->sync_conf.al_extents = 127; + if (!expect(mdev->sync_conf.al_extents >= DRBD_AL_EXTENTS_MIN)) + mdev->sync_conf.al_extents = DRBD_AL_EXTENTS_MIN; if (mdev->act_log && mdev->act_log->nr_elements == mdev->sync_conf.al_extents) @@ -760,7 +760,7 @@ static int drbd_check_al_size(struct drbd_conf *mdev) in_use = 0; t = mdev->act_log; - n = lc_create("act_log", drbd_al_ext_cache, 1, + n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION, mdev->sync_conf.al_extents, sizeof(struct lc_element), 0); if (n == NULL) { @@ -932,7 +932,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp union drbd_state ns, os; enum drbd_state_rv rv; int cp_discovered = 0; - int logical_block_size; drbd_reconfig_start(mdev); @@ -1087,25 +1086,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp drbd_md_set_sector_offsets(mdev, nbc); - /* allocate a second IO page if logical_block_size != 512 */ - logical_block_size = bdev_logical_block_size(nbc->md_bdev); - if (logical_block_size == 0) - logical_block_size = MD_SECTOR_SIZE; - - if (logical_block_size != MD_SECTOR_SIZE) { - if (!mdev->md_io_tmpp) { - struct page *page = alloc_page(GFP_NOIO); - if (!page) - goto force_diskless_dec; - - dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n", - logical_block_size, MD_SECTOR_SIZE); - dev_warn(DEV, "Workaround engaged (has performance impact).\n"); - - mdev->md_io_tmpp = page; - } - } - if (!mdev->bitmap) { if (drbd_bm_init(mdev)) { retcode = ERR_NOMEM; @@ -1804,14 +1784,12 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n if (!expect(sc.rate >= 1)) sc.rate = 1; - if (!expect(sc.al_extents >= 7)) - sc.al_extents = 127; /* arbitrary minimum */ -#define AL_MAX ((MD_AL_MAX_SIZE-1) * AL_EXTENTS_PT) - if (sc.al_extents > AL_MAX) { - dev_err(DEV, "sc.al_extents > %d\n", AL_MAX); - sc.al_extents = AL_MAX; - } -#undef AL_MAX + + /* clip to allowed range */ + if (!expect(sc.al_extents >= DRBD_AL_EXTENTS_MIN)) + sc.al_extents = DRBD_AL_EXTENTS_MIN; + if (!expect(sc.al_extents <= DRBD_AL_EXTENTS_MAX)) + sc.al_extents = DRBD_AL_EXTENTS_MAX; /* to avoid spurious errors when configuring minors before configuring * the minors they depend on: if necessary, first create the minor we diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 35fc08a0a55..70a688b92c1 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -336,6 +336,10 @@ enum drbd_timeout_flag { #define DRBD_MAGIC 0x83740267 #define DRBD_MAGIC_BIG 0x835a +/* how I came up with this magic? + * base64 decode "actlog==" ;) */ +#define DRBD_AL_MAGIC 0x69cb65a2 + /* these are of type "int" */ #define DRBD_MD_INDEX_INTERNAL -1 #define DRBD_MD_INDEX_FLEX_EXT -2 diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 447c3675238..75f05af3372 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -102,10 +102,12 @@ #define DRBD_RATE_DEF 250 /* kb/second */ /* less than 7 would hit performance unnecessarily. - * 3833 is the largest prime that still does fit - * into 64 sectors of activity log */ + * 919 slots context information per transaction, + * 32k activity log, 4k transaction size, + * one transaction in flight: + * 919 * 7 = 6433 */ #define DRBD_AL_EXTENTS_MIN 7 -#define DRBD_AL_EXTENTS_MAX 3833 +#define DRBD_AL_EXTENTS_MAX 6433 #define DRBD_AL_EXTENTS_DEF 127 #define DRBD_AFTER_MIN -1 From c9d963a46de87bdbc14565ed692ca3114ddbf11b Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 15:10:23 +0100 Subject: [PATCH 132/609] drbd: silence some log messages on bitmap IO Summary log messages meant for global bitmap IO should not be printed for bitmap IO caused by activity log transactions. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index bc89c4a30cb..3791082979e 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1096,9 +1096,12 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w */ if (!atomic_dec_and_test(&ctx.in_flight)) wait_for_completion(&ctx.done); - dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", - rw == WRITE ? "WRITE" : "READ", - count, jiffies - now); + + /* summary for global bitmap IO */ + if (flags == 0) + dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", + rw == WRITE ? "WRITE" : "READ", + count, jiffies - now); if (ctx.error) { dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); @@ -1116,8 +1119,9 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w } now = b->bm_set; - dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", - ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); + if (flags == 0) + dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", + ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); return err; } From 9676c760979371701ea5a6f8adb7ce8125c22c7d Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 22 Feb 2011 14:02:31 +0100 Subject: [PATCH 133/609] drbd: fix a wrong likely(), updated comments Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_worker.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 01ab0bc0cd9..8ee5c4f3d1c 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -295,6 +295,7 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio * crypto_hash_final(&desc, digest); } +/* MAYBE merge common code with w_e_end_ov_req */ static int w_e_send_csum(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); @@ -306,7 +307,7 @@ static int w_e_send_csum(struct drbd_work *w, int cancel) if (unlikely(cancel)) goto out; - if (likely((peer_req->flags & EE_WAS_ERROR) != 0)) + if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0)) goto out; digest_size = crypto_hash_digestsize(mdev->csums_tfm); @@ -315,7 +316,7 @@ static int w_e_send_csum(struct drbd_work *w, int cancel) sector_t sector = peer_req->i.sector; unsigned int size = peer_req->i.size; drbd_csum_ee(mdev, mdev->csums_tfm, peer_req, digest); - /* Free e and pages before send. + /* Free peer_req and pages before send. * In case we block on congestion, we could otherwise run into * some distributed deadlock, if the other side blocks on * congestion as well, because our receiver blocks in @@ -1151,11 +1152,11 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) } } - /* Free e and pages before send. - * In case we block on congestion, we could otherwise run into - * some distributed deadlock, if the other side blocks on - * congestion as well, because our receiver blocks in - * drbd_pp_alloc due to pp_in_use > max_buffers. */ + /* Free peer_req and pages before send. + * In case we block on congestion, we could otherwise run into + * some distributed deadlock, if the other side blocks on + * congestion as well, because our receiver blocks in + * drbd_pp_alloc due to pp_in_use > max_buffers. */ drbd_free_ee(mdev, peer_req); if (!eq) drbd_ov_oos_found(mdev, sector, size); From 8050e6d00521795d153ea20d81712321e5b46d80 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 18 Feb 2011 16:12:48 +0100 Subject: [PATCH 134/609] drbd: Use container_of() instead of casting Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7540b342832..c08a99d57c5 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1455,7 +1455,8 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, * drbd_process_done_ee() by asender only */ static int e_end_resync_block(struct drbd_work *w, int unused) { - struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w; + struct drbd_peer_request *peer_req = + container_of(w, struct drbd_peer_request, w); struct drbd_conf *mdev = w->mdev; sector_t sector = peer_req->i.sector; int ok; @@ -1593,7 +1594,8 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd, */ static int e_end_block(struct drbd_work *w, int cancel) { - struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w; + struct drbd_peer_request *peer_req = + container_of(w, struct drbd_peer_request, w); struct drbd_conf *mdev = w->mdev; sector_t sector = peer_req->i.sector; int ok = 1, pcmd; @@ -1631,7 +1633,8 @@ static int e_end_block(struct drbd_work *w, int cancel) static int e_send_discard_ack(struct drbd_work *w, int unused) { - struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w; + struct drbd_peer_request *peer_req = + container_of(w, struct drbd_peer_request, w); struct drbd_conf *mdev = w->mdev; int ok = 1; From 206d3589411fbdead67a358ce9aaa20d771724df Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 26 Feb 2011 23:19:15 +0100 Subject: [PATCH 135/609] drbd: Concurrent write detection fix Commit 9b1e63e changed the concurrent write detection algorithm to only insert peer requests into write_requests tree after determining that there is no conflict. With this change, new conflicting local requests could be added while the algorithm runs, but this case was not handled correctly. Instead of making the algorithm deal with this case, switch back to adding peer requests to the write_requests tree immediately: this improves fairness. When a peer request is discarded, remove that request from the write_requests Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 41 +++++++++++++++--------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c08a99d57c5..c61bf121bd0 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1636,16 +1636,10 @@ static int e_send_discard_ack(struct drbd_work *w, int unused) struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); struct drbd_conf *mdev = w->mdev; - int ok = 1; + int ok; D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); ok = drbd_send_ack(mdev, P_DISCARD_ACK, peer_req); - - spin_lock_irq(&mdev->tconn->req_lock); - D_ASSERT(!drbd_interval_empty(&peer_req->i)); - drbd_remove_epoch_entry_interval(mdev, peer_req); - spin_unlock_irq(&mdev->tconn->req_lock); - dec_unacked(mdev); return ok; @@ -1836,6 +1830,12 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, spin_lock_irq(&mdev->tconn->req_lock); + /* + * Inserting the peer request into the write_requests tree will + * prevent new conflicting local requests from being added. + */ + drbd_insert_interval(&mdev->write_requests, &peer_req->i); + first = 1; for (;;) { struct drbd_interval *i; @@ -1844,26 +1844,26 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, prepare_to_wait(&mdev->misc_wait, &wait, TASK_INTERRUPTIBLE); - i = drbd_find_overlap(&mdev->write_requests, sector, size); - if (i) { + drbd_for_each_overlap(i, &mdev->write_requests, sector, size) { + struct drbd_request *req2; + + if (i == &peer_req->i || !i->local) + continue; + /* only ALERT on first iteration, * we may be woken up early... */ if (first) - dev_alert(DEV, "%s[%u] Concurrent %s write detected!" + dev_alert(DEV, "%s[%u] Concurrent local write detected!" " new: %llus +%u; pending: %llus +%u\n", current->comm, current->pid, - i->local ? "local" : "remote", (unsigned long long)sector, size, (unsigned long long)i->sector, i->size); - if (i->local) { - struct drbd_request *req2; - - req2 = container_of(i, struct drbd_request, i); - if (req2->rq_state & RQ_NET_PENDING) - ++have_unacked; - } + req2 = container_of(i, struct drbd_request, i); + if (req2->rq_state & RQ_NET_PENDING) + ++have_unacked; ++have_conflict; + break; } if (!have_conflict) break; @@ -1872,6 +1872,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, if (first && discard && have_unacked) { dev_alert(DEV, "Concurrent write! [DISCARD BY FLAG] sec=%llus\n", (unsigned long long)sector); + drbd_remove_epoch_entry_interval(mdev, peer_req); inc_unacked(mdev); peer_req->w.cb = e_send_discard_ack; list_add_tail(&peer_req->w.list, &mdev->done_ee); @@ -1888,6 +1889,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, } if (signal_pending(current)) { + drbd_remove_epoch_entry_interval(mdev, peer_req); spin_unlock_irq(&mdev->tconn->req_lock); finish_wait(&mdev->misc_wait, &wait); goto out_interrupted; @@ -1906,12 +1908,11 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, * there must be none now. */ D_ASSERT(have_unacked == 0); } + /* FIXME: Introduce a timeout here after which we disconnect. */ schedule(); spin_lock_irq(&mdev->tconn->req_lock); } finish_wait(&mdev->misc_wait, &wait); - - drbd_insert_interval(&mdev->write_requests, &peer_req->i); } list_add(&peer_req->w.list, &mdev->active_ee); From 8ccf218e9f19ecae4d115eeff686c9f1a1e5bc9e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 24 Feb 2011 11:35:43 +0100 Subject: [PATCH 136/609] drbd: Replace atomic_add_return with atomic_inc_return Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 4 ++-- drivers/block/drbd/drbd_nl.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index efedfbc0619..4718aa4e527 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1242,7 +1242,7 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, p.sector = sector; p.block_id = block_id; p.blksize = blksize; - p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq)); + p.seq_num = cpu_to_be32(atomic_inc_return(&mdev->packet_seq)); if (!mdev->tconn->meta.socket || mdev->state.conn < C_CONNECTED) return false; @@ -1530,7 +1530,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) prepare_header(mdev, &p.head, P_DATA, sizeof(p) - sizeof(struct p_header) + dgs + req->i.size); p.sector = cpu_to_be64(req->i.sector); p.block_id = (unsigned long)req; - p.seq_num = cpu_to_be32(req->seq_num = atomic_add_return(1, &mdev->packet_seq)); + p.seq_num = cpu_to_be32(req->seq_num = atomic_inc_return(&mdev->packet_seq)); dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 90d73172320..016858741cf 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2374,7 +2374,7 @@ void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state) cn_reply->id.idx = CN_IDX_DRBD; cn_reply->id.val = CN_VAL_DRBD; - cn_reply->seq = atomic_add_return(1, &drbd_nl_seq); + cn_reply->seq = atomic_inc_return(&drbd_nl_seq); cn_reply->ack = 0; /* not used here. */ cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + (int)((char *)tl - (char *)reply->tag_list); @@ -2406,7 +2406,7 @@ void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name) cn_reply->id.idx = CN_IDX_DRBD; cn_reply->id.val = CN_VAL_DRBD; - cn_reply->seq = atomic_add_return(1, &drbd_nl_seq); + cn_reply->seq = atomic_inc_return(&drbd_nl_seq); cn_reply->ack = 0; /* not used here. */ cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + (int)((char *)tl - (char *)reply->tag_list); @@ -2485,7 +2485,7 @@ void drbd_bcast_ee(struct drbd_conf *mdev, const char *reason, const int dgs, cn_reply->id.idx = CN_IDX_DRBD; cn_reply->id.val = CN_VAL_DRBD; - cn_reply->seq = atomic_add_return(1,&drbd_nl_seq); + cn_reply->seq = atomic_inc_return(&drbd_nl_seq); cn_reply->ack = 0; // not used here. cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + (int)((char*)tl - (char*)reply->tag_list); @@ -2524,7 +2524,7 @@ void drbd_bcast_sync_progress(struct drbd_conf *mdev) cn_reply->id.idx = CN_IDX_DRBD; cn_reply->id.val = CN_VAL_DRBD; - cn_reply->seq = atomic_add_return(1, &drbd_nl_seq); + cn_reply->seq = atomic_inc_return(&drbd_nl_seq); cn_reply->ack = 0; /* not used here. */ cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + (int)((char *)tl - (char *)reply->tag_list); From 71b1c1eb9c544141e743c4d14b3c576fd4c31a5a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 1 Mar 2011 15:40:43 +0100 Subject: [PATCH 137/609] drbd: Use ping-timeout when waiting for missing ack packets When the node with the discard flag resolves write conflicts in dual-primary mode, it may determine that its peer has sent ack packets on the metadata socket which did not arrive, yet. Wait for the next ack with ping-timeout instead of a hard-coded 30 seconds. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c61bf121bd0..112098bc4c8 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1710,11 +1710,12 @@ static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq) } p_seq = mdev->peer_seq; spin_unlock(&mdev->peer_seq_lock); - timeout = schedule_timeout(30*HZ); + timeout = mdev->tconn->net_conf->ping_timeo*HZ/10; + timeout = schedule_timeout(timeout); spin_lock(&mdev->peer_seq_lock); if (timeout == 0 && p_seq == mdev->peer_seq) { ret = -ETIMEDOUT; - dev_err(DEV, "ASSERT FAILED waited 30 seconds for sequence update, forcing reconnect\n"); + dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n"); break; } } From 7be8da0798f08fb9564d4f64fe4a7d6fb4fab20b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 22 Feb 2011 02:15:32 +0100 Subject: [PATCH 138/609] drbd: Improve how conflicting writes are handled The previous algorithm for dealing with overlapping concurrent writes was generating unnecessary warnings for scenarios which could be legitimate, and did not always handle partially overlapping requests correctly. Improve it algorithm as follows: * While local or remote write requests are in progress, conflicting new local write requests will be delayed (commit 82172f7). * When a conflict between a local and remote write request is detected, the node with the discard flag decides how to resolve the conflict: It will ask its peer to discard conflicting requests which are fully contained in the local request and retry requests which overlap only partially. This involves a protocol change. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 15 +- drivers/block/drbd/drbd_main.c | 35 ++- drivers/block/drbd/drbd_receiver.c | 410 ++++++++++++++++++----------- drivers/block/drbd/drbd_req.c | 75 +++--- drivers/block/drbd/drbd_req.h | 7 +- 5 files changed, 351 insertions(+), 191 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 3213808a898..17e905d0582 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -200,7 +200,7 @@ enum drbd_packet { P_RECV_ACK = 0x15, /* Used in protocol B */ P_WRITE_ACK = 0x16, /* Used in protocol C */ P_RS_WRITE_ACK = 0x17, /* Is a P_WRITE_ACK, additionally call set_in_sync(). */ - P_DISCARD_ACK = 0x18, /* Used in proto C, two-primaries conflict detection */ + P_DISCARD_WRITE = 0x18, /* Used in proto C, two-primaries conflict detection */ P_NEG_ACK = 0x19, /* Sent if local disk is unusable */ P_NEG_DREPLY = 0x1a, /* Local disk is broken... */ P_NEG_RS_DREPLY = 0x1b, /* Local disk is broken... */ @@ -223,8 +223,9 @@ enum drbd_packet { P_RS_CANCEL = 0x29, /* meta: Used to cancel RS_DATA_REQUEST packet by SyncSource */ P_CONN_ST_CHG_REQ = 0x2a, /* data sock: Connection wide state request */ P_CONN_ST_CHG_REPLY = 0x2b, /* meta sock: Connection side state req reply */ + P_RETRY_WRITE = 0x2c, /* Protocol C: retry conflicting write request */ - P_MAX_CMD = 0x2c, + P_MAX_CMD = 0x2d, P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ P_MAX_OPT_CMD = 0x101, @@ -350,7 +351,7 @@ struct p_data { * commands which share a struct: * p_block_ack: * P_RECV_ACK (proto B), P_WRITE_ACK (proto C), - * P_DISCARD_ACK (proto C, two-primaries conflict detection) + * P_DISCARD_WRITE (proto C, two-primaries conflict detection) * p_block_req: * P_DATA_REQUEST, P_RS_DATA_REQUEST */ @@ -362,7 +363,6 @@ struct p_block_ack { u32 seq_num; } __packed; - struct p_block_req { struct p_header head; u64 sector; @@ -655,6 +655,8 @@ struct drbd_work { #include "drbd_interval.h" +extern int drbd_wait_misc(struct drbd_conf *, struct drbd_interval *); + struct drbd_request { struct drbd_work w; @@ -752,12 +754,16 @@ enum { /* This ee has a pointer to a digest instead of a block id */ __EE_HAS_DIGEST, + + /* Conflicting local requests need to be restarted after this request */ + __EE_RESTART_REQUESTS, }; #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) #define EE_RESUBMITTED (1<<__EE_RESUBMITTED) #define EE_WAS_ERROR (1<<__EE_WAS_ERROR) #define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST) +#define EE_RESTART_REQUESTS (1<<__EE_RESTART_REQUESTS) /* flag bits per mdev */ enum { @@ -1478,6 +1484,7 @@ extern void drbd_free_tconn(struct drbd_tconn *tconn); extern int proc_details; /* drbd_req */ +extern int __drbd_make_request(struct drbd_conf *, struct bio *, unsigned long); extern int drbd_make_request(struct request_queue *q, struct bio *bio); extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req); extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 4718aa4e527..b93c5eccd73 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3003,7 +3003,7 @@ const char *cmdname(enum drbd_packet cmd) [P_RECV_ACK] = "RecvAck", [P_WRITE_ACK] = "WriteAck", [P_RS_WRITE_ACK] = "RSWriteAck", - [P_DISCARD_ACK] = "DiscardAck", + [P_DISCARD_WRITE] = "DiscardWrite", [P_NEG_ACK] = "NegAck", [P_NEG_DREPLY] = "NegDReply", [P_NEG_RS_DREPLY] = "NegRSDReply", @@ -3018,6 +3018,7 @@ const char *cmdname(enum drbd_packet cmd) [P_COMPRESSED_BITMAP] = "CBitmap", [P_DELAY_PROBE] = "DelayProbe", [P_OUT_OF_SYNC] = "OutOfSync", + [P_RETRY_WRITE] = "RetryWrite", [P_MAX_CMD] = NULL, }; @@ -3032,6 +3033,38 @@ const char *cmdname(enum drbd_packet cmd) return cmdnames[cmd]; } +/** + * drbd_wait_misc - wait for a request to make progress + * @mdev: device associated with the request + * @i: the struct drbd_interval embedded in struct drbd_request or + * struct drbd_peer_request + */ +int drbd_wait_misc(struct drbd_conf *mdev, struct drbd_interval *i) +{ + struct net_conf *net_conf = mdev->tconn->net_conf; + DEFINE_WAIT(wait); + long timeout; + + if (!net_conf) + return -ETIMEDOUT; + timeout = MAX_SCHEDULE_TIMEOUT; + if (net_conf->ko_count) + timeout = net_conf->timeout * HZ / 10 * net_conf->ko_count; + + /* Indicate to wake up mdev->misc_wait on progress. */ + i->waiting = true; + prepare_to_wait(&mdev->misc_wait, &wait, TASK_INTERRUPTIBLE); + spin_unlock_irq(&mdev->tconn->req_lock); + timeout = schedule_timeout(timeout); + finish_wait(&mdev->misc_wait, &wait); + spin_lock_irq(&mdev->tconn->req_lock); + if (!timeout || mdev->state.conn < C_CONNECTED) + return -ETIMEDOUT; + if (signal_pending(current)) + return -ERESTARTSYS; + return 0; +} + #ifdef CONFIG_DRBD_FAULT_INJECTION /* Fault insertion support including random number generator shamelessly * stolen from kernel/rcutorture.c */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 112098bc4c8..8c82d8945cf 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -415,7 +415,7 @@ static int drbd_process_done_ee(struct drbd_conf *mdev) drbd_free_net_ee(mdev, peer_req); /* possible callbacks here: - * e_end_block, and e_end_resync_block, e_send_discard_ack. + * e_end_block, and e_end_resync_block, e_send_discard_write. * all ignore the last argument. */ list_for_each_entry_safe(peer_req, t, &work_list, w.list) { @@ -1589,6 +1589,51 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd, return ok; } +static int w_restart_write(struct drbd_work *w, int cancel) +{ + struct drbd_request *req = container_of(w, struct drbd_request, w); + struct drbd_conf *mdev = w->mdev; + struct bio *bio; + unsigned long start_time; + unsigned long flags; + + spin_lock_irqsave(&mdev->tconn->req_lock, flags); + if (!expect(req->rq_state & RQ_POSTPONED)) { + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); + return 0; + } + bio = req->master_bio; + start_time = req->start_time; + /* Postponed requests will not have their master_bio completed! */ + __req_mod(req, DISCARD_WRITE, NULL); + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); + + while (__drbd_make_request(mdev, bio, start_time)) + /* retry */ ; + return 1; +} + +static void restart_conflicting_writes(struct drbd_conf *mdev, + sector_t sector, int size) +{ + struct drbd_interval *i; + struct drbd_request *req; + + drbd_for_each_overlap(i, &mdev->write_requests, sector, size) { + if (!i->local) + continue; + req = container_of(i, struct drbd_request, i); + if (req->rq_state & RQ_LOCAL_PENDING || + !(req->rq_state & RQ_POSTPONED)) + continue; + if (expect(list_empty(&req->w.list))) { + req->w.mdev = mdev; + req->w.cb = w_restart_write; + drbd_queue_work(&mdev->tconn->data.work, &req->w); + } + } +} + /* e_end_block() is called via drbd_process_done_ee(). * this means this function only runs in the asender thread */ @@ -1622,6 +1667,8 @@ static int e_end_block(struct drbd_work *w, int cancel) spin_lock_irq(&mdev->tconn->req_lock); D_ASSERT(!drbd_interval_empty(&peer_req->i)); drbd_remove_epoch_entry_interval(mdev, peer_req); + if (peer_req->flags & EE_RESTART_REQUESTS) + restart_conflicting_writes(mdev, sector, peer_req->i.size); spin_unlock_irq(&mdev->tconn->req_lock); } else D_ASSERT(drbd_interval_empty(&peer_req->i)); @@ -1631,20 +1678,32 @@ static int e_end_block(struct drbd_work *w, int cancel) return ok; } -static int e_send_discard_ack(struct drbd_work *w, int unused) +static int e_send_ack(struct drbd_work *w, enum drbd_packet ack) { + struct drbd_conf *mdev = w->mdev; struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); - struct drbd_conf *mdev = w->mdev; int ok; - D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); - ok = drbd_send_ack(mdev, P_DISCARD_ACK, peer_req); + ok = drbd_send_ack(mdev, ack, peer_req); dec_unacked(mdev); return ok; } +static int e_send_discard_write(struct drbd_work *w, int unused) +{ + return e_send_ack(w, P_DISCARD_WRITE); +} + +static int e_send_retry_write(struct drbd_work *w, int unused) +{ + struct drbd_tconn *tconn = w->mdev->tconn; + + return e_send_ack(w, tconn->agreed_pro_version >= 100 ? + P_RETRY_WRITE : P_DISCARD_WRITE); +} + static bool seq_greater(u32 a, u32 b) { /* @@ -1660,16 +1719,31 @@ static u32 seq_max(u32 a, u32 b) return seq_greater(a, b) ? a : b; } +static bool need_peer_seq(struct drbd_conf *mdev) +{ + struct drbd_tconn *tconn = mdev->tconn; + + /* + * We only need to keep track of the last packet_seq number of our peer + * if we are in dual-primary mode and we have the discard flag set; see + * handle_write_conflicts(). + */ + return tconn->net_conf->two_primaries && + test_bit(DISCARD_CONCURRENT, &tconn->flags); +} + static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq) { unsigned int old_peer_seq; - spin_lock(&mdev->peer_seq_lock); - old_peer_seq = mdev->peer_seq; - mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq); - spin_unlock(&mdev->peer_seq_lock); - if (old_peer_seq != peer_seq) - wake_up(&mdev->seq_wait); + if (need_peer_seq(mdev)) { + spin_lock(&mdev->peer_seq_lock); + old_peer_seq = mdev->peer_seq; + mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq); + spin_unlock(&mdev->peer_seq_lock); + if (old_peer_seq != peer_seq) + wake_up(&mdev->seq_wait); + } } /* Called from receive_Data. @@ -1693,36 +1767,39 @@ static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq) * * returns 0 if we may process the packet, * -ERESTARTSYS if we were interrupted (by disconnect signal). */ -static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq) +static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq) { DEFINE_WAIT(wait); - unsigned int p_seq; long timeout; - int ret = 0; + int ret; + + if (!need_peer_seq(mdev)) + return 0; + spin_lock(&mdev->peer_seq_lock); for (;;) { - prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE); - if (!seq_greater(packet_seq, mdev->peer_seq + 1)) + if (!seq_greater(peer_seq - 1, mdev->peer_seq)) { + mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq); + ret = 0; break; + } if (signal_pending(current)) { ret = -ERESTARTSYS; break; } - p_seq = mdev->peer_seq; + prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE); spin_unlock(&mdev->peer_seq_lock); timeout = mdev->tconn->net_conf->ping_timeo*HZ/10; timeout = schedule_timeout(timeout); spin_lock(&mdev->peer_seq_lock); - if (timeout == 0 && p_seq == mdev->peer_seq) { + if (!timeout) { ret = -ETIMEDOUT; dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n"); break; } } - finish_wait(&mdev->seq_wait, &wait); - if (mdev->peer_seq+1 == packet_seq) - mdev->peer_seq++; spin_unlock(&mdev->peer_seq_lock); + finish_wait(&mdev->seq_wait, &wait); return ret; } @@ -1737,6 +1814,139 @@ static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf) (dpf & DP_DISCARD ? REQ_DISCARD : 0); } +static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector, + unsigned int size) +{ + struct drbd_interval *i; + + repeat: + drbd_for_each_overlap(i, &mdev->write_requests, sector, size) { + struct drbd_request *req; + struct bio_and_error m; + + if (!i->local) + continue; + req = container_of(i, struct drbd_request, i); + if (!(req->rq_state & RQ_POSTPONED)) + continue; + req->rq_state &= ~RQ_POSTPONED; + __req_mod(req, NEG_ACKED, &m); + spin_unlock_irq(&mdev->tconn->req_lock); + if (m.bio) + complete_master_bio(mdev, &m); + spin_lock_irq(&mdev->tconn->req_lock); + goto repeat; + } +} + +static int handle_write_conflicts(struct drbd_conf *mdev, + struct drbd_peer_request *peer_req) +{ + struct drbd_tconn *tconn = mdev->tconn; + bool resolve_conflicts = test_bit(DISCARD_CONCURRENT, &tconn->flags); + sector_t sector = peer_req->i.sector; + const unsigned int size = peer_req->i.size; + struct drbd_interval *i; + bool equal; + int err; + + /* + * Inserting the peer request into the write_requests tree will prevent + * new conflicting local requests from being added. + */ + drbd_insert_interval(&mdev->write_requests, &peer_req->i); + + repeat: + drbd_for_each_overlap(i, &mdev->write_requests, sector, size) { + if (i == &peer_req->i) + continue; + + if (!i->local) { + /* + * Our peer has sent a conflicting remote request; this + * should not happen in a two-node setup. Wait for the + * earlier peer request to complete. + */ + err = drbd_wait_misc(mdev, i); + if (err) + goto out; + goto repeat; + } + + equal = i->sector == sector && i->size == size; + if (resolve_conflicts) { + /* + * If the peer request is fully contained within the + * overlapping request, it can be discarded; otherwise, + * it will be retried once all overlapping requests + * have completed. + */ + bool discard = i->sector <= sector && i->sector + + (i->size >> 9) >= sector + (size >> 9); + + if (!equal) + dev_alert(DEV, "Concurrent writes detected: " + "local=%llus +%u, remote=%llus +%u, " + "assuming %s came first\n", + (unsigned long long)i->sector, i->size, + (unsigned long long)sector, size, + discard ? "local" : "remote"); + + inc_unacked(mdev); + peer_req->w.cb = discard ? e_send_discard_write : + e_send_retry_write; + list_add_tail(&peer_req->w.list, &mdev->done_ee); + wake_asender(mdev->tconn); + + err = -ENOENT; + goto out; + } else { + struct drbd_request *req = + container_of(i, struct drbd_request, i); + + if (!equal) + dev_alert(DEV, "Concurrent writes detected: " + "local=%llus +%u, remote=%llus +%u\n", + (unsigned long long)i->sector, i->size, + (unsigned long long)sector, size); + + if (req->rq_state & RQ_LOCAL_PENDING || + !(req->rq_state & RQ_POSTPONED)) { + /* + * Wait for the node with the discard flag to + * decide if this request will be discarded or + * retried. Requests that are discarded will + * disappear from the write_requests tree. + * + * In addition, wait for the conflicting + * request to finish locally before submitting + * the conflicting peer request. + */ + err = drbd_wait_misc(mdev, &req->i); + if (err) { + _conn_request_state(mdev->tconn, + NS(conn, C_TIMEOUT), + CS_HARD); + fail_postponed_requests(mdev, sector, size); + goto out; + } + goto repeat; + } + /* + * Remember to restart the conflicting requests after + * the new peer request has completed. + */ + peer_req->flags |= EE_RESTART_REQUESTS; + } + } + err = 0; + + out: + if (err) + drbd_remove_epoch_entry_interval(mdev, peer_req); + return err; +} + /* mirrored write */ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, unsigned int data_size) @@ -1744,18 +1954,17 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, sector_t sector; struct drbd_peer_request *peer_req; struct p_data *p = &mdev->tconn->data.rbuf.data; + u32 peer_seq = be32_to_cpu(p->seq_num); int rw = WRITE; u32 dp_flags; + int err; + if (!get_ldev(mdev)) { - spin_lock(&mdev->peer_seq_lock); - if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num)) - mdev->peer_seq++; - spin_unlock(&mdev->peer_seq_lock); - + err = wait_for_and_update_peer_seq(mdev, peer_seq); drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size); atomic_inc(&mdev->current_epoch->epoch_size); - return drbd_drain_block(mdev, data_size); + return drbd_drain_block(mdev, data_size) && err == 0; } /* @@ -1785,137 +1994,22 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, atomic_inc(&peer_req->epoch->active); spin_unlock(&mdev->epoch_lock); - /* I'm the receiver, I do hold a net_cnt reference. */ - if (!mdev->tconn->net_conf->two_primaries) { - spin_lock_irq(&mdev->tconn->req_lock); - } else { - /* don't get the req_lock yet, - * we may sleep in drbd_wait_peer_seq */ - const int size = peer_req->i.size; - const int discard = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags); - DEFINE_WAIT(wait); - int first; - - D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); - - /* conflict detection and handling: - * 1. wait on the sequence number, - * in case this data packet overtook ACK packets. - * 2. check for conflicting write requests. - * - * Note: for two_primaries, we are protocol C, - * so there cannot be any request that is DONE - * but still on the transfer log. - * - * if no conflicting request is found: - * submit. - * - * if any conflicting request is found - * that has not yet been acked, - * AND I have the "discard concurrent writes" flag: - * queue (via done_ee) the P_DISCARD_ACK; OUT. - * - * if any conflicting request is found: - * block the receiver, waiting on misc_wait - * until no more conflicting requests are there, - * or we get interrupted (disconnect). - * - * we do not just write after local io completion of those - * requests, but only after req is done completely, i.e. - * we wait for the P_DISCARD_ACK to arrive! - * - * then proceed normally, i.e. submit. - */ - if (drbd_wait_peer_seq(mdev, be32_to_cpu(p->seq_num))) + if (mdev->tconn->net_conf->two_primaries) { + err = wait_for_and_update_peer_seq(mdev, peer_seq); + if (err) goto out_interrupted; - spin_lock_irq(&mdev->tconn->req_lock); - - /* - * Inserting the peer request into the write_requests tree will - * prevent new conflicting local requests from being added. - */ - drbd_insert_interval(&mdev->write_requests, &peer_req->i); - - first = 1; - for (;;) { - struct drbd_interval *i; - int have_unacked = 0; - int have_conflict = 0; - prepare_to_wait(&mdev->misc_wait, &wait, - TASK_INTERRUPTIBLE); - - drbd_for_each_overlap(i, &mdev->write_requests, sector, size) { - struct drbd_request *req2; - - if (i == &peer_req->i || !i->local) - continue; - - /* only ALERT on first iteration, - * we may be woken up early... */ - if (first) - dev_alert(DEV, "%s[%u] Concurrent local write detected!" - " new: %llus +%u; pending: %llus +%u\n", - current->comm, current->pid, - (unsigned long long)sector, size, - (unsigned long long)i->sector, i->size); - - req2 = container_of(i, struct drbd_request, i); - if (req2->rq_state & RQ_NET_PENDING) - ++have_unacked; - ++have_conflict; - break; - } - if (!have_conflict) - break; - - /* Discard Ack only for the _first_ iteration */ - if (first && discard && have_unacked) { - dev_alert(DEV, "Concurrent write! [DISCARD BY FLAG] sec=%llus\n", - (unsigned long long)sector); - drbd_remove_epoch_entry_interval(mdev, peer_req); - inc_unacked(mdev); - peer_req->w.cb = e_send_discard_ack; - list_add_tail(&peer_req->w.list, &mdev->done_ee); - - spin_unlock_irq(&mdev->tconn->req_lock); - - /* we could probably send that P_DISCARD_ACK ourselves, - * but I don't like the receiver using the msock */ - + err = handle_write_conflicts(mdev, peer_req); + if (err) { + spin_unlock_irq(&mdev->tconn->req_lock); + if (err == -ENOENT) { put_ldev(mdev); - wake_asender(mdev->tconn); - finish_wait(&mdev->misc_wait, &wait); return true; } - - if (signal_pending(current)) { - drbd_remove_epoch_entry_interval(mdev, peer_req); - spin_unlock_irq(&mdev->tconn->req_lock); - finish_wait(&mdev->misc_wait, &wait); - goto out_interrupted; - } - - /* Indicate to wake up mdev->misc_wait upon completion. */ - i->waiting = true; - - spin_unlock_irq(&mdev->tconn->req_lock); - if (first) { - first = 0; - dev_alert(DEV, "Concurrent write! [W AFTERWARDS] " - "sec=%llus\n", (unsigned long long)sector); - } else if (discard) { - /* we had none on the first iteration. - * there must be none now. */ - D_ASSERT(have_unacked == 0); - } - /* FIXME: Introduce a timeout here after which we disconnect. */ - schedule(); - spin_lock_irq(&mdev->tconn->req_lock); + goto out_interrupted; } - finish_wait(&mdev->misc_wait, &wait); - } - + } else + spin_lock_irq(&mdev->tconn->req_lock); list_add(&peer_req->w.list, &mdev->active_ee); spin_unlock_irq(&mdev->tconn->req_lock); @@ -4393,9 +4487,13 @@ static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd) D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B); what = RECV_ACKED_BY_PEER; break; - case P_DISCARD_ACK: + case P_DISCARD_WRITE: D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); - what = CONFLICT_DISCARDED_BY_PEER; + what = DISCARD_WRITE; + break; + case P_RETRY_WRITE: + D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); + what = POSTPONE_WRITE; break; default: D_ASSERT(0); @@ -4446,6 +4544,7 @@ static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd) sector_t sector = be64_to_cpu(p->sector); update_peer_seq(mdev, be32_to_cpu(p->seq_num)); + dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n", (unsigned long long)sector, be32_to_cpu(p->blksize)); @@ -4567,7 +4666,7 @@ static struct asender_cmd *get_asender_cmd(int cmd) [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, - [P_DISCARD_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, + [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck }, [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply }, [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply}, @@ -4578,6 +4677,7 @@ static struct asender_cmd *get_asender_cmd(int cmd) [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip }, [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply}, [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_RqSReply }, + [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, [P_MAX_CMD] = { 0, NULL }, }; if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 733219884ab..aab1acdd4fa 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -225,12 +225,16 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) * the receiver, * the bio_endio completion callbacks. */ + if (s & RQ_LOCAL_PENDING) + return; + if (req->i.waiting) { + /* Retry all conflicting peer requests. */ + wake_up(&mdev->misc_wait); + } if (s & RQ_NET_QUEUED) return; if (s & RQ_NET_PENDING) return; - if (s & RQ_LOCAL_PENDING) - return; if (req->master_bio) { /* this is DATA_RECEIVED (remote read) @@ -267,7 +271,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) else root = &mdev->read_requests; drbd_remove_request_interval(root, req); - } else + } else if (!(s & RQ_POSTPONED)) D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0); /* for writes we need to do some extra housekeeping */ @@ -277,8 +281,10 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) /* Update disk stats */ _drbd_end_io_acct(mdev, req); - m->error = ok ? 0 : (error ?: -EIO); - m->bio = req->master_bio; + if (!(s & RQ_POSTPONED)) { + m->error = ok ? 0 : (error ?: -EIO); + m->bio = req->master_bio; + } req->master_bio = NULL; } @@ -318,7 +324,9 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, { struct drbd_conf *mdev = req->w.mdev; int rv = 0; - m->bio = NULL; + + if (m) + m->bio = NULL; switch (what) { default: @@ -332,7 +340,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, */ case TO_BE_SENT: /* via network */ - /* reached via drbd_make_request_common + /* reached via __drbd_make_request * and from w_read_retry_remote */ D_ASSERT(!(req->rq_state & RQ_NET_MASK)); req->rq_state |= RQ_NET_PENDING; @@ -340,7 +348,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case TO_BE_SUBMITTED: /* locally */ - /* reached via drbd_make_request_common */ + /* reached via __drbd_make_request */ D_ASSERT(!(req->rq_state & RQ_LOCAL_MASK)); req->rq_state |= RQ_LOCAL_PENDING; break; @@ -403,7 +411,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * no local disk, * or target area marked as invalid, * or just got an io-error. */ - /* from drbd_make_request_common + /* from __drbd_make_request * or from bio_endio during read io-error recovery */ /* so we can verify the handle in the answer packet @@ -422,7 +430,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, case QUEUE_FOR_NET_WRITE: /* assert something? */ - /* from drbd_make_request_common only */ + /* from __drbd_make_request only */ /* corresponding hlist_del is in _req_may_be_done() */ drbd_insert_interval(&mdev->write_requests, &req->i); @@ -436,7 +444,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * * _req_add_to_epoch(req); this has to be after the * _maybe_start_new_epoch(req); which happened in - * drbd_make_request_common, because we now may set the bit + * __drbd_make_request, because we now may set the bit * again ourselves to close the current epoch. * * Add req to the (now) current epoch (barrier). */ @@ -446,7 +454,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * hurting performance. */ set_bit(UNPLUG_REMOTE, &mdev->flags); - /* see drbd_make_request_common, + /* see __drbd_make_request, * just after it grabs the req_lock */ D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0); @@ -535,14 +543,10 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, case WRITE_ACKED_BY_PEER_AND_SIS: req->rq_state |= RQ_NET_SIS; - case CONFLICT_DISCARDED_BY_PEER: + case DISCARD_WRITE: /* for discarded conflicting writes of multiple primaries, * there is no need to keep anything in the tl, potential * node crashes are covered by the activity log. */ - if (what == CONFLICT_DISCARDED_BY_PEER) - dev_alert(DEV, "Got DiscardAck packet %llus +%u!" - " DRBD is not a random data generator!\n", - (unsigned long long)req->i.sector, req->i.size); req->rq_state |= RQ_NET_DONE; /* fall through */ case WRITE_ACKED_BY_PEER: @@ -569,6 +573,17 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, _req_may_be_done_not_susp(req, m); break; + case POSTPONE_WRITE: + /* + * If this node has already detected the write conflict, the + * worker will be waiting on misc_wait. Wake it up once this + * request has completed locally. + */ + D_ASSERT(req->rq_state & RQ_NET_PENDING); + req->rq_state |= RQ_POSTPONED; + _req_may_be_done_not_susp(req, m); + break; + case NEG_ACKED: /* assert something? */ if (req->rq_state & RQ_NET_PENDING) { @@ -688,24 +703,19 @@ static int complete_conflicting_writes(struct drbd_conf *mdev, sector_t sector, int size) { for(;;) { - DEFINE_WAIT(wait); struct drbd_interval *i; + int err; i = drbd_find_overlap(&mdev->write_requests, sector, size); if (!i) return 0; - i->waiting = true; - prepare_to_wait(&mdev->misc_wait, &wait, TASK_INTERRUPTIBLE); - spin_unlock_irq(&mdev->tconn->req_lock); - schedule(); - finish_wait(&mdev->misc_wait, &wait); - spin_lock_irq(&mdev->tconn->req_lock); - if (signal_pending(current)) - return -ERESTARTSYS; + err = drbd_wait_misc(mdev, i); + if (err) + return err; } } -static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time) +int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time) { const int rw = bio_rw(bio); const int size = bio->bi_size; @@ -811,7 +821,12 @@ allocate_barrier: if (rw == WRITE) { err = complete_conflicting_writes(mdev, sector, size); if (err) { + if (err != -ERESTARTSYS) + _conn_request_state(mdev->tconn, + NS(conn, C_TIMEOUT), + CS_HARD); spin_unlock_irq(&mdev->tconn->req_lock); + err = -EIO; goto fail_free_complete; } } @@ -1031,7 +1046,7 @@ int drbd_make_request(struct request_queue *q, struct bio *bio) if (likely(s_enr == e_enr)) { inc_ap_bio(mdev, 1); - return drbd_make_request_common(mdev, bio, start_time); + return __drbd_make_request(mdev, bio, start_time); } /* can this bio be split generically? @@ -1069,10 +1084,10 @@ int drbd_make_request(struct request_queue *q, struct bio *bio) D_ASSERT(e_enr == s_enr + 1); - while (drbd_make_request_common(mdev, &bp->bio1, start_time)) + while (__drbd_make_request(mdev, &bp->bio1, start_time)) inc_ap_bio(mdev, 1); - while (drbd_make_request_common(mdev, &bp->bio2, start_time)) + while (__drbd_make_request(mdev, &bp->bio2, start_time)) inc_ap_bio(mdev, 1); dec_ap_bio(mdev); diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index e6f2361d6b1..0b3cd412d52 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -97,7 +97,8 @@ enum drbd_req_event { RECV_ACKED_BY_PEER, WRITE_ACKED_BY_PEER, WRITE_ACKED_BY_PEER_AND_SIS, /* and set_in_sync */ - CONFLICT_DISCARDED_BY_PEER, + DISCARD_WRITE, + POSTPONE_WRITE, NEG_ACKED, BARRIER_ACKED, /* in protocol A and B */ DATA_RECEIVED, /* (remote read) */ @@ -194,6 +195,9 @@ enum drbd_req_state_bits { /* Should call drbd_al_complete_io() for this request... */ __RQ_IN_ACT_LOG, + + /* The peer has sent a retry ACK */ + __RQ_POSTPONED, }; #define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) @@ -214,6 +218,7 @@ enum drbd_req_state_bits { #define RQ_WRITE (1UL << __RQ_WRITE) #define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG) +#define RQ_POSTPONED (1UL << __RQ_POSTPONED) /* For waking up the frozen transfer log mod_req() has to return if the request should be counted in the epoch object*/ From 1b3bb47d526c63a845e3374d6272a67fbe20a468 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 28 Jan 2011 13:28:51 +0100 Subject: [PATCH 139/609] drbd: Remove redundant check Opening a device only succeeds on a primary node, or when explicitly setting the allow_oos module parameter to allow opening the device read-only on a secondary node. There is no other way that a request can get into drbd_make_request(), so this code cannot trigger. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index aab1acdd4fa..cfa5fba5303 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -997,39 +997,12 @@ fail_and_free_req: return ret; } -/* helper function for drbd_make_request - * if we can determine just by the mdev (state) that this request will fail, - * return 1 - * otherwise return 0 - */ -static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write) -{ - if (mdev->state.role != R_PRIMARY && - (!allow_oos || is_write)) { - if (__ratelimit(&drbd_ratelimit_state)) { - dev_err(DEV, "Process %s[%u] tried to %s; " - "since we are not in Primary state, " - "we cannot allow this\n", - current->comm, current->pid, - is_write ? "WRITE" : "READ"); - } - return 1; - } - - return 0; -} - int drbd_make_request(struct request_queue *q, struct bio *bio) { unsigned int s_enr, e_enr; struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; unsigned long start_time; - if (drbd_fail_request_early(mdev, bio_data_dir(bio) & WRITE)) { - bio_endio(bio, -EPERM); - return 0; - } - start_time = jiffies; /* From 6e849ce88cd63efc6650a1826d18ed742eb31999 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 14 Mar 2011 17:27:45 +0100 Subject: [PATCH 140/609] drbd: Get rid of P_MAX_CMD Instead of artificially enlarging the command decoding arrays to P_MAX_CMD entries, check if an index is within the valid range using the ARRAY_SIZE() macro. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 - drivers/block/drbd/drbd_main.c | 3 +-- drivers/block/drbd/drbd_receiver.c | 10 ++++------ 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 17e905d0582..be52b58a97d 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -225,7 +225,6 @@ enum drbd_packet { P_CONN_ST_CHG_REPLY = 0x2b, /* meta sock: Connection side state req reply */ P_RETRY_WRITE = 0x2c, /* Protocol C: retry conflicting write request */ - P_MAX_CMD = 0x2d, P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ P_MAX_OPT_CMD = 0x101, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index b93c5eccd73..f43752fb5b5 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3019,7 +3019,6 @@ const char *cmdname(enum drbd_packet cmd) [P_DELAY_PROBE] = "DelayProbe", [P_OUT_OF_SYNC] = "OutOfSync", [P_RETRY_WRITE] = "RetryWrite", - [P_MAX_CMD] = NULL, }; if (cmd == P_HAND_SHAKE_M) @@ -3028,7 +3027,7 @@ const char *cmdname(enum drbd_packet cmd) return "HandShakeS"; if (cmd == P_HAND_SHAKE) return "HandShake"; - if (cmd >= P_MAX_CMD) + if (cmd >= ARRAY_SIZE(cmdnames)) return "Unknown"; return cmdnames[cmd]; } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 8c82d8945cf..262e5d97991 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3875,9 +3875,6 @@ static struct data_cmd drbd_cmd_handler[] = { [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip }, [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync }, [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state }, - /* anything missing from this table is in - * the asender_tbl, see get_asender_cmd */ - [P_MAX_CMD] = { 0, 0, NULL }, }; /* All handler functions that expect a sub-header get that sub-heder in @@ -3899,7 +3896,8 @@ static void drbdd(struct drbd_tconn *tconn) if (!drbd_recv_header(tconn, &pi)) goto err_out; - if (unlikely(pi.cmd >= P_MAX_CMD || !drbd_cmd_handler[pi.cmd].function)) { + if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || + !drbd_cmd_handler[pi.cmd].function)) { conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size); goto err_out; } @@ -4678,9 +4676,9 @@ static struct asender_cmd *get_asender_cmd(int cmd) [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply}, [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_RqSReply }, [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, - [P_MAX_CMD] = { 0, NULL }, }; - if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL) + + if (cmd >= ARRAY_SIZE(asender_tbl) || !asender_tbl[cmd].process) return NULL; return &asender_tbl[cmd]; } From 7201b972ded6c3a4d180887b187ca5897dfe49bf Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 14 Mar 2011 18:23:00 +0100 Subject: [PATCH 141/609] drbd: Replace get_asender_cmd() with its implementation Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 66 +++++++++++++----------------- 1 file changed, 28 insertions(+), 38 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 262e5d97991..540fcbf1d1e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4648,41 +4648,6 @@ static int got_skip(struct drbd_conf *mdev, enum drbd_packet cmd) return true; } -struct asender_cmd { - size_t pkt_size; - int (*process)(struct drbd_conf *mdev, enum drbd_packet cmd); -}; - -static struct asender_cmd *get_asender_cmd(int cmd) -{ - static struct asender_cmd asender_tbl[] = { - /* anything missing from this table is in - * the drbd_cmd_handler (drbd_default_handler) table, - * see the beginning of drbdd() */ - [P_PING] = { sizeof(struct p_header), got_Ping }, - [P_PING_ACK] = { sizeof(struct p_header), got_PingAck }, - [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, - [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, - [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, - [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, - [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck }, - [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply }, - [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply}, - [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult }, - [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck }, - [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, - [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, - [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip }, - [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply}, - [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_RqSReply }, - [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, - }; - - if (cmd >= ARRAY_SIZE(asender_tbl) || !asender_tbl[cmd].process) - return NULL; - return &asender_tbl[cmd]; -} - static int _drbd_process_done_ee(int vnr, void *p, void *data) { struct drbd_conf *mdev = (struct drbd_conf *)p; @@ -4719,6 +4684,31 @@ static int tconn_process_done_ee(struct drbd_tconn *tconn) return 0; } +struct asender_cmd { + size_t pkt_size; + int (*process)(struct drbd_conf *, enum drbd_packet); +}; + +static struct asender_cmd asender_tbl[] = { + [P_PING] = { sizeof(struct p_header), got_Ping }, + [P_PING_ACK] = { sizeof(struct p_header), got_PingAck }, + [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, + [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, + [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, + [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, + [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck }, + [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply }, + [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply}, + [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult }, + [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck }, + [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, + [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, + [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip }, + [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply}, + [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_RqSReply }, + [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, +}; + int drbd_asender(struct drbd_thread *thi) { struct drbd_tconn *tconn = thi->tconn; @@ -4803,8 +4793,8 @@ int drbd_asender(struct drbd_thread *thi) if (received == expect && cmd == NULL) { if (!decode_header(tconn, h, &pi)) goto reconnect; - cmd = get_asender_cmd(pi.cmd); - if (unlikely(cmd == NULL)) { + cmd = &asender_tbl[pi.cmd]; + if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd) { conn_err(tconn, "unknown command %d on meta (l: %d)\n", pi.cmd, pi.size); goto disconnect; @@ -4823,7 +4813,7 @@ int drbd_asender(struct drbd_thread *thi) /* the idle_timeout (ping-int) * has been restored in got_PingAck() */ - if (cmd == get_asender_cmd(P_PING_ACK)) + if (cmd == &asender_tbl[P_PING_ACK]) ping_timeout_active = 0; buf = h; From c66342d9493804ba92a3c9f48ba225a936c9736f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 14:23:53 +0100 Subject: [PATCH 142/609] drbd: Remove left-over function prototypes Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 -- drivers/block/drbd/drbd_receiver.c | 1 - 2 files changed, 3 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index be52b58a97d..a346eb87d0f 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1534,7 +1534,6 @@ extern void drbd_csum_bio(struct drbd_conf *, struct crypto_hash *, struct bio * extern void drbd_csum_ee(struct drbd_conf *, struct crypto_hash *, struct drbd_peer_request *, void *); /* worker callbacks */ -extern int w_req_cancel_conflict(struct drbd_work *, int); extern int w_read_retry_remote(struct drbd_work *, int); extern int w_e_end_data_req(struct drbd_work *, int); extern int w_e_end_rsdata_req(struct drbd_work *, int); @@ -1543,7 +1542,6 @@ extern int w_e_end_ov_reply(struct drbd_work *, int); extern int w_e_end_ov_req(struct drbd_work *, int); extern int w_ov_finished(struct drbd_work *, int); extern int w_resync_timer(struct drbd_work *, int); -extern int w_resume_next_sg(struct drbd_work *, int); extern int w_send_write_hint(struct drbd_work *, int); extern int w_send_dblock(struct drbd_work *, int); extern int w_send_barrier(struct drbd_work *, int); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 540fcbf1d1e..0c3a094f691 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4009,7 +4009,6 @@ static int drbd_disconnected(int vnr, void *p, void *data) del_timer(&mdev->request_timer); - /* make sure syncer is stopped and w_resume_next_sg queued */ del_timer_sync(&mdev->resync_timer); resync_timer_fn((unsigned long)mdev); From 0e29d163f7ec8369b3f1fb70900d29b1c4a1dc8b Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 18 Feb 2011 14:23:11 +0100 Subject: [PATCH 143/609] drbd: Reworked the unconfiguring and thread stopping code * Moved CONFIG_PENDING and DEVICE_DYING from mdev to tconn. * Renamed drbd_reconfig_start() and drbd_reconfig_done() to conn_reconfig_start() and conn_reconfig_done(). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 18 ++++++---- drivers/block/drbd/drbd_nl.c | 40 +++++++++++----------- drivers/block/drbd/drbd_receiver.c | 6 ++-- drivers/block/drbd/drbd_state.c | 54 +++++++++++++++++++----------- drivers/block/drbd/drbd_state.h | 1 + drivers/block/drbd/drbd_worker.c | 31 +++++++---------- 6 files changed, 80 insertions(+), 70 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index a346eb87d0f..145ae57b311 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -787,12 +787,6 @@ enum { GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */ WAS_IO_ERROR, /* Local disk failed returned IO error */ RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ - CONFIG_PENDING, /* serialization of (re)configuration requests. - * if set, also prevents the device from dying */ - DEVICE_DYING, /* device became unconfigured, - * but worker thread is still handling the cleanup. - * reconfiguring (nl_disk_conf, nl_net_conf) is dissalowed, - * while this is set. */ RESIZE_PENDING, /* Size change detected locally, waiting for the response from * the peer, if it changed there as well. */ CONN_DRY_RUN, /* Expect disconnect after resync handshake. */ @@ -921,6 +915,12 @@ enum { GOT_PING_ACK, /* set when we receive a ping_ack packet, ping_wait gets woken */ CONN_WD_ST_CHG_OKAY, CONN_WD_ST_CHG_FAIL, + CONFIG_PENDING, /* serialization of (re)configuration requests. + * if set, also prevents the device from dying */ + OBJECT_DYING, /* device became unconfigured, + * but worker thread is still handling the cleanup. + * reconfiguring (nl_disk_conf, nl_net_conf) is dissalowed, + * while this is set. */ }; struct drbd_tconn { /* is a resource from the config file */ @@ -1574,7 +1574,11 @@ extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head); extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled); extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed); -extern void drbd_flush_workqueue(struct drbd_conf *mdev); +extern void conn_flush_workqueue(struct drbd_tconn *tconn); +static inline void drbd_flush_workqueue(struct drbd_conf *mdev) +{ + conn_flush_workqueue(mdev->tconn); +} /* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to * mess with get_fs/set_fs, we know we are KERNEL_DS always. */ diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 016858741cf..8cdfb46243e 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -871,29 +871,27 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev) * or start a new one. Flush any pending work, there may still be an * after_state_change queued. */ -static void drbd_reconfig_start(struct drbd_conf *mdev) +static void conn_reconfig_start(struct drbd_tconn *tconn) { - wait_event(mdev->state_wait, !test_and_set_bit(CONFIG_PENDING, &mdev->flags)); - wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags)); - drbd_thread_start(&mdev->tconn->worker); - drbd_flush_workqueue(mdev); + wait_event(tconn->ping_wait, !test_and_set_bit(CONFIG_PENDING, &tconn->flags)); + wait_event(tconn->ping_wait, !test_bit(OBJECT_DYING, &tconn->flags)); + drbd_thread_start(&tconn->worker); + conn_flush_workqueue(tconn); } /* if still unconfigured, stops worker again. * if configured now, clears CONFIG_PENDING. * wakes potential waiters */ -static void drbd_reconfig_done(struct drbd_conf *mdev) +static void conn_reconfig_done(struct drbd_tconn *tconn) { - spin_lock_irq(&mdev->tconn->req_lock); - if (mdev->state.disk == D_DISKLESS && - mdev->state.conn == C_STANDALONE && - mdev->state.role == R_SECONDARY) { - set_bit(DEVICE_DYING, &mdev->flags); - drbd_thread_stop_nowait(&mdev->tconn->worker); + spin_lock_irq(&tconn->req_lock); + if (conn_all_vols_unconf(tconn)) { + set_bit(OBJECT_DYING, &tconn->flags); + drbd_thread_stop_nowait(&tconn->worker); } else - clear_bit(CONFIG_PENDING, &mdev->flags); - spin_unlock_irq(&mdev->tconn->req_lock); - wake_up(&mdev->state_wait); + clear_bit(CONFIG_PENDING, &tconn->flags); + spin_unlock_irq(&tconn->req_lock); + wake_up(&tconn->ping_wait); } /* Make sure IO is suspended before calling this function(). */ @@ -933,7 +931,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp enum drbd_state_rv rv; int cp_discovered = 0; - drbd_reconfig_start(mdev); + conn_reconfig_start(mdev->tconn); /* if you want to reconfigure, please tear down first */ if (mdev->state.disk > D_DISKLESS) { @@ -1279,7 +1277,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); put_ldev(mdev); reply->ret_code = retcode; - drbd_reconfig_done(mdev); + conn_reconfig_done(mdev->tconn); return 0; force_diskless_dec: @@ -1300,7 +1298,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp lc_destroy(resync_lru); reply->ret_code = retcode; - drbd_reconfig_done(mdev); + conn_reconfig_done(mdev->tconn); return 0; } @@ -1344,7 +1342,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, void *int_dig_vv = NULL; struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr; - drbd_reconfig_start(mdev); + conn_reconfig_start(mdev->tconn); if (mdev->state.conn > C_STANDALONE) { retcode = ERR_NET_CONFIGURED; @@ -1530,7 +1528,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); reply->ret_code = retcode; - drbd_reconfig_done(mdev); + conn_reconfig_done(mdev->tconn); return 0; fail: @@ -1543,7 +1541,7 @@ fail: kfree(new_conf); reply->ret_code = retcode; - drbd_reconfig_done(mdev); + conn_reconfig_done(mdev->tconn); return 0; } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 0c3a094f691..66080e20408 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3932,14 +3932,14 @@ static void drbdd(struct drbd_tconn *tconn) } } -void drbd_flush_workqueue(struct drbd_conf *mdev) +void conn_flush_workqueue(struct drbd_tconn *tconn) { struct drbd_wq_barrier barr; barr.w.cb = w_prev_work_done; - barr.w.mdev = mdev; + barr.w.tconn = tconn; init_completion(&barr.done); - drbd_queue_work(&mdev->tconn->data.work, &barr.w); + drbd_queue_work(&tconn->data.work, &barr.w); wait_for_completion(&barr.done); } diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index d3bf8e39fa5..338e1f5c7cd 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -41,13 +41,29 @@ extern void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what); static int w_after_state_ch(struct drbd_work *w, int unused); static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); -static void after_all_state_ch(struct drbd_tconn *tconn, union drbd_state ns); +static void after_all_state_ch(struct drbd_tconn *tconn); static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state); static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns); static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns, const char **warn_sync_abort); +int conn_all_vols_unconf(struct drbd_tconn *tconn) +{ + struct drbd_conf *mdev; + int minor, uncfg = 1; + + idr_for_each_entry(&tconn->volumes, mdev, minor) { + uncfg &= (mdev->state.disk == D_DISKLESS && + mdev->state.conn == C_STANDALONE && + mdev->state.role == R_SECONDARY); + if (!uncfg) + break; + } + + return uncfg; +} + /** * cl_wide_st_chg() - true if the state change is a cluster wide one * @mdev: DRBD device. @@ -744,20 +760,6 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, print_state_change(mdev, os, ns, flags); - /* solve the race between becoming unconfigured, - * worker doing the cleanup, and - * admin reconfiguring us: - * on (re)configure, first set CONFIG_PENDING, - * then wait for a potentially exiting worker, - * start the worker, and schedule one no_op. - * then proceed with configuration. - */ - if (ns.disk == D_DISKLESS && - ns.conn == C_STANDALONE && - ns.role == R_SECONDARY && - !test_and_set_bit(CONFIG_PENDING, &mdev->flags)) - set_bit(DEVICE_DYING, &mdev->flags); - /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference * on the ldev here, to be sure the transition -> D_DISKLESS resp. * drbd_ldev_destroy() won't happen before our corresponding @@ -768,6 +770,18 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, mdev->state = ns; + /* solve the race between becoming unconfigured, + * worker doing the cleanup, and + * admin reconfiguring us: + * on (re)configure, first set CONFIG_PENDING, + * then wait for a potentially exiting worker, + * start the worker, and schedule one no_op. + * then proceed with configuration. + */ + if(conn_all_vols_unconf(mdev->tconn) && + !test_and_set_bit(CONFIG_PENDING, &mdev->tconn->flags)) + set_bit(OBJECT_DYING, &mdev->tconn->flags); + if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) drbd_print_uuids(mdev, "attached to UUIDs"); @@ -1236,7 +1250,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, resume_next_sg(mdev); } - after_all_state_ch(mdev->tconn, ns); + after_all_state_ch(mdev->tconn); drbd_md_sync(mdev); } @@ -1248,10 +1262,10 @@ struct after_conn_state_chg_work { enum chg_state_flags flags; }; -static void after_all_state_ch(struct drbd_tconn *tconn, union drbd_state ns) +static void after_all_state_ch(struct drbd_tconn *tconn) { - if (ns.disk == D_DISKLESS && ns.conn == C_STANDALONE && ns.role == R_SECONDARY) { - /* if (test_bit(DEVICE_DYING, &mdev->flags)) TODO: DEVICE_DYING functionality */ + if (conn_all_vols_unconf(tconn) && + test_bit(OBJECT_DYING, &tconn->flags)) { drbd_thread_stop_nowait(&tconn->worker); } } @@ -1271,7 +1285,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) drbd_thread_start(&tconn->receiver); //conn_err(tconn, STATE_FMT, STATE_ARGS("nms", nms)); - after_all_state_ch(tconn, nms); + after_all_state_ch(tconn); return 1; } diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index 5fdbdf0be70..d9536cd798e 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -91,6 +91,7 @@ conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_s enum chg_state_flags flags); extern void drbd_resume_al(struct drbd_conf *mdev); +extern int conn_all_vols_unconf(struct drbd_tconn *tconn); /** * drbd_request_state() - Reqest a state change diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 8ee5c4f3d1c..5cb5ffce097 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1643,29 +1643,13 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) mutex_unlock(mdev->state_mutex); } -static int _worker_dying(int vnr, void *p, void *data) -{ - struct drbd_conf *mdev = (struct drbd_conf *)p; - - D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); - /* _drbd_set_state only uses stop_nowait. - * wait here for the exiting receiver. */ - drbd_thread_stop(&mdev->tconn->receiver); - drbd_mdev_cleanup(mdev); - - clear_bit(DEVICE_DYING, &mdev->flags); - clear_bit(CONFIG_PENDING, &mdev->flags); - wake_up(&mdev->state_wait); - - return 0; -} - int drbd_worker(struct drbd_thread *thi) { struct drbd_tconn *tconn = thi->tconn; struct drbd_work *w = NULL; + struct drbd_conf *mdev; LIST_HEAD(work_list); - int intr = 0; + int minor, intr = 0; while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); @@ -1749,7 +1733,16 @@ int drbd_worker(struct drbd_thread *thi) */ spin_unlock_irq(&tconn->data.work.q_lock); - idr_for_each(&tconn->volumes, _worker_dying, NULL); + drbd_thread_stop(&tconn->receiver); + idr_for_each_entry(&tconn->volumes, mdev, minor) { + D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); + /* _drbd_set_state only uses stop_nowait. + * wait here for the exiting receiver. */ + drbd_mdev_cleanup(mdev); + } + clear_bit(OBJECT_DYING, &tconn->flags); + clear_bit(CONFIG_PENDING, &tconn->flags); + wake_up(&tconn->ping_wait); return 0; } From 3f9cbe937ec41fca8842594e0529537f3019c775 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 17 Feb 2011 22:50:23 +0100 Subject: [PATCH 144/609] drbd: Removed the mdev parameter from the ..to_tags() and ...from_tags() functions Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 38 ++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 8cdfb46243e..33159e47e6e 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -50,9 +50,9 @@ static char *drbd_m_holder = "Hands off! this is DRBD's meta data device."; /* Generate the tag_list to struct functions */ #define NL_PACKET(name, number, fields) \ -static int name ## _from_tags(struct drbd_conf *mdev, \ +static int name ## _from_tags( \ unsigned short *tags, struct name *arg) __attribute__ ((unused)); \ -static int name ## _from_tags(struct drbd_conf *mdev, \ +static int name ## _from_tags( \ unsigned short *tags, struct name *arg) \ { \ int tag; \ @@ -64,7 +64,7 @@ static int name ## _from_tags(struct drbd_conf *mdev, \ fields \ default: \ if (tag & T_MANDATORY) { \ - dev_err(DEV, "Unknown tag: %d\n", tag_number(tag)); \ + printk(KERN_ERR "drbd: Unknown tag: %d\n", tag_number(tag)); \ return 0; \ } \ } \ @@ -87,7 +87,7 @@ static int name ## _from_tags(struct drbd_conf *mdev, \ #define NL_STRING(pn, pr, member, len) \ case pn: /* D_ASSERT( tag_type(tag) == TT_STRING ); */ \ if (dlen > len) { \ - dev_err(DEV, "arg too long: %s (%u wanted, max len: %u bytes)\n", \ + printk(KERN_ERR "drbd: arg too long: %s (%u wanted, max len: %u bytes)\n", \ #member, dlen, (unsigned int)len); \ return 0; \ } \ @@ -99,10 +99,10 @@ static int name ## _from_tags(struct drbd_conf *mdev, \ /* Generate the struct to tag_list functions */ #define NL_PACKET(name, number, fields) \ static unsigned short* \ -name ## _to_tags(struct drbd_conf *mdev, \ +name ## _to_tags( \ struct name *arg, unsigned short *tags) __attribute__ ((unused)); \ static unsigned short* \ -name ## _to_tags(struct drbd_conf *mdev, \ +name ## _to_tags( \ struct name *arg, unsigned short *tags) \ { \ fields \ @@ -483,7 +483,7 @@ static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct primary primary_args; memset(&primary_args, 0, sizeof(struct primary)); - if (!primary_from_tags(mdev, nlp->tag_list, &primary_args)) { + if (!primary_from_tags(nlp->tag_list, &primary_args)) { reply->ret_code = ERR_MANDATORY_TAG; return 0; } @@ -956,7 +956,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp nbc->dc.fencing = DRBD_FENCING_DEF; nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF; - if (!disk_conf_from_tags(mdev, nlp->tag_list, &nbc->dc)) { + if (!disk_conf_from_tags(nlp->tag_list, &nbc->dc)) { retcode = ERR_MANDATORY_TAG; goto fail; } @@ -1376,7 +1376,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, new_conf->on_congestion = DRBD_ON_CONGESTION_DEF; new_conf->cong_extents = DRBD_CONG_EXTENTS_DEF; - if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) { + if (!net_conf_from_tags(nlp->tag_list, new_conf)) { retcode = ERR_MANDATORY_TAG; goto fail; } @@ -1553,7 +1553,7 @@ static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl struct disconnect dc; memset(&dc, 0, sizeof(struct disconnect)); - if (!disconnect_from_tags(mdev, nlp->tag_list, &dc)) { + if (!disconnect_from_tags(nlp->tag_list, &dc)) { retcode = ERR_MANDATORY_TAG; goto fail; } @@ -1630,7 +1630,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, enum dds_flags ddsf; memset(&rs, 0, sizeof(struct resize)); - if (!resize_from_tags(mdev, nlp->tag_list, &rs)) { + if (!resize_from_tags(nlp->tag_list, &rs)) { retcode = ERR_MANDATORY_TAG; goto fail; } @@ -1715,7 +1715,7 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n } else memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf)); - if (!syncer_conf_from_tags(mdev, nlp->tag_list, &sc)) { + if (!syncer_conf_from_tags(nlp->tag_list, &sc)) { retcode = ERR_MANDATORY_TAG; goto fail; } @@ -2020,15 +2020,15 @@ static int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl tl = reply->tag_list; if (get_ldev(mdev)) { - tl = disk_conf_to_tags(mdev, &mdev->ldev->dc, tl); + tl = disk_conf_to_tags(&mdev->ldev->dc, tl); put_ldev(mdev); } if (get_net_conf(mdev->tconn)) { - tl = net_conf_to_tags(mdev, mdev->tconn->net_conf, tl); + tl = net_conf_to_tags(mdev->tconn->net_conf, tl); put_net_conf(mdev->tconn); } - tl = syncer_conf_to_tags(mdev, &mdev->sync_conf, tl); + tl = syncer_conf_to_tags(&mdev->sync_conf, tl); put_unaligned(TT_END, tl++); /* Close the tag list */ @@ -2043,7 +2043,7 @@ static int drbd_nl_get_state(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp unsigned long rs_left; unsigned int res; - tl = get_state_to_tags(mdev, (struct get_state *)&s, tl); + tl = get_state_to_tags((struct get_state *)&s, tl); /* no local ref, no bitmap, no syncer progress. */ if (s.conn >= C_SYNC_SOURCE && s.conn <= C_PAUSED_SYNC_T) { @@ -2105,7 +2105,7 @@ static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct start_ov args = { .start_sector = mdev->ov_start_sector }; - if (!start_ov_from_tags(mdev, nlp->tag_list, &args)) { + if (!start_ov_from_tags(nlp->tag_list, &args)) { reply->ret_code = ERR_MANDATORY_TAG; return 0; } @@ -2131,7 +2131,7 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl struct new_c_uuid args; memset(&args, 0, sizeof(struct new_c_uuid)); - if (!new_c_uuid_from_tags(mdev, nlp->tag_list, &args)) { + if (!new_c_uuid_from_tags(nlp->tag_list, &args)) { reply->ret_code = ERR_MANDATORY_TAG; return 0; } @@ -2365,7 +2365,7 @@ void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state) /* dev_warn(DEV, "drbd_bcast_state() got called\n"); */ - tl = get_state_to_tags(mdev, (struct get_state *)&state, tl); + tl = get_state_to_tags((struct get_state *)&state, tl); put_unaligned(TT_END, tl++); /* Close the tag list */ From 49559d87fdfe3ab33c684506c394681da6a746c9 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 21 Feb 2011 14:19:44 +0100 Subject: [PATCH 145/609] drbd: Improved the dec_*() macros Now those can be used with a struct drbd_conf * that has an other name than 'mdev'. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 51 ++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 145ae57b311..103b61748c2 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1988,17 +1988,19 @@ static inline void inc_ap_pending(struct drbd_conf *mdev) atomic_inc(&mdev->ap_pending_cnt); } -#define ERR_IF_CNT_IS_NEGATIVE(which) \ - if (atomic_read(&mdev->which) < 0) \ +#define ERR_IF_CNT_IS_NEGATIVE(which, func, line) \ + if (atomic_read(&mdev->which) < 0) \ dev_err(DEV, "in %s:%d: " #which " = %d < 0 !\n", \ - __func__ , __LINE__ , \ - atomic_read(&mdev->which)) + func, line, \ + atomic_read(&mdev->which)) -#define dec_ap_pending(mdev) do { \ - typecheck(struct drbd_conf *, mdev); \ - if (atomic_dec_and_test(&mdev->ap_pending_cnt)) \ - wake_up(&mdev->misc_wait); \ - ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt); } while (0) +#define dec_ap_pending(mdev) _dec_ap_pending(mdev, __FUNCTION__, __LINE__) +static inline void _dec_ap_pending(struct drbd_conf *mdev, const char *func, int line) +{ + if (atomic_dec_and_test(&mdev->ap_pending_cnt)) + wake_up(&mdev->misc_wait); + ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt, func, line); +} /* counts how many resync-related answers we still expect from the peer * increase decrease @@ -2011,10 +2013,12 @@ static inline void inc_rs_pending(struct drbd_conf *mdev) atomic_inc(&mdev->rs_pending_cnt); } -#define dec_rs_pending(mdev) do { \ - typecheck(struct drbd_conf *, mdev); \ - atomic_dec(&mdev->rs_pending_cnt); \ - ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt); } while (0) +#define dec_rs_pending(mdev) _dec_rs_pending(mdev, __FUNCTION__, __LINE__) +static inline void _dec_rs_pending(struct drbd_conf *mdev, const char *func, int line) +{ + atomic_dec(&mdev->rs_pending_cnt); + ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt, func, line); +} /* counts how many answers we still need to send to the peer. * increased on @@ -2030,16 +2034,19 @@ static inline void inc_unacked(struct drbd_conf *mdev) atomic_inc(&mdev->unacked_cnt); } -#define dec_unacked(mdev) do { \ - typecheck(struct drbd_conf *, mdev); \ - atomic_dec(&mdev->unacked_cnt); \ - ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0) - -#define sub_unacked(mdev, n) do { \ - typecheck(struct drbd_conf *, mdev); \ - atomic_sub(n, &mdev->unacked_cnt); \ - ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0) +#define dec_unacked(mdev) _dec_unacked(mdev, __FUNCTION__, __LINE__) +static inline void _dec_unacked(struct drbd_conf *mdev, const char *func, int line) +{ + atomic_dec(&mdev->unacked_cnt); + ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); +} +#define sub_unacked(mdev, n) _sub_unacked(mdev, n, __FUNCTION__, __LINE__) +static inline void _sub_unacked(struct drbd_conf *mdev, int n, const char *func, int line) +{ + atomic_sub(n, &mdev->unacked_cnt); + ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); +} static inline void put_net_conf(struct drbd_tconn *tconn) { From 2f5cdd0b2cf7a4099faa7e53ba0a29ddf0ddf950 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 21 Feb 2011 14:29:27 +0100 Subject: [PATCH 146/609] drbd: Converted the transfer log from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 6 +- drivers/block/drbd/drbd_main.c | 125 +++++++++++++++-------------- drivers/block/drbd/drbd_nl.c | 4 +- drivers/block/drbd/drbd_receiver.c | 6 +- drivers/block/drbd/drbd_req.c | 2 +- drivers/block/drbd/drbd_req.h | 3 +- drivers/block/drbd/drbd_state.c | 5 +- 7 files changed, 79 insertions(+), 72 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 103b61748c2..48367e53a7a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1173,10 +1173,10 @@ extern void drbd_calc_cpu_mask(struct drbd_tconn *tconn); #define drbd_calc_cpu_mask(A) ({}) #endif extern void drbd_free_resources(struct drbd_conf *mdev); -extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, +extern void tl_release(struct drbd_tconn *, unsigned int barrier_nr, unsigned int set_size); -extern void tl_clear(struct drbd_conf *mdev); -extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *); +extern void tl_clear(struct drbd_tconn *); +extern void _tl_add_barrier(struct drbd_tconn *, struct drbd_tl_epoch *); extern void drbd_free_sock(struct drbd_tconn *tconn); extern int drbd_send(struct drbd_tconn *tconn, struct socket *sock, void *buf, size_t size, unsigned msg_flags); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f43752fb5b5..cbec5ff2cc7 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -180,7 +180,7 @@ int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) * Each &struct drbd_tl_epoch has a circular double linked list of requests * attached. */ -static int tl_init(struct drbd_conf *mdev) +static int tl_init(struct drbd_tconn *tconn) { struct drbd_tl_epoch *b; @@ -195,21 +195,23 @@ static int tl_init(struct drbd_conf *mdev) b->n_writes = 0; b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */ - mdev->tconn->oldest_tle = b; - mdev->tconn->newest_tle = b; - INIT_LIST_HEAD(&mdev->tconn->out_of_sequence_requests); + tconn->oldest_tle = b; + tconn->newest_tle = b; + INIT_LIST_HEAD(&tconn->out_of_sequence_requests); return 1; } -static void tl_cleanup(struct drbd_conf *mdev) +static void tl_cleanup(struct drbd_tconn *tconn) { - D_ASSERT(mdev->tconn->oldest_tle == mdev->tconn->newest_tle); - D_ASSERT(list_empty(&mdev->tconn->out_of_sequence_requests)); - kfree(mdev->tconn->oldest_tle); - mdev->tconn->oldest_tle = NULL; - kfree(mdev->tconn->unused_spare_tle); - mdev->tconn->unused_spare_tle = NULL; + if (tconn->oldest_tle != tconn->newest_tle) + conn_err(tconn, "ASSERT FAILED: oldest_tle == newest_tle\n"); + if (!list_empty(&tconn->out_of_sequence_requests)) + conn_err(tconn, "ASSERT FAILED: list_empty(out_of_sequence_requests)\n"); + kfree(tconn->oldest_tle); + tconn->oldest_tle = NULL; + kfree(tconn->unused_spare_tle); + tconn->unused_spare_tle = NULL; } /** @@ -219,7 +221,7 @@ static void tl_cleanup(struct drbd_conf *mdev) * * The caller must hold the req_lock. */ -void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new) +void _tl_add_barrier(struct drbd_tconn *tconn, struct drbd_tl_epoch *new) { struct drbd_tl_epoch *newest_before; @@ -229,13 +231,13 @@ void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new) new->next = NULL; new->n_writes = 0; - newest_before = mdev->tconn->newest_tle; + newest_before = tconn->newest_tle; /* never send a barrier number == 0, because that is special-cased * when using TCQ for our write ordering code */ new->br_number = (newest_before->br_number+1) ?: 1; - if (mdev->tconn->newest_tle != new) { - mdev->tconn->newest_tle->next = new; - mdev->tconn->newest_tle = new; + if (tconn->newest_tle != new) { + tconn->newest_tle->next = new; + tconn->newest_tle = new; } } @@ -249,31 +251,32 @@ void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new) * &struct drbd_tl_epoch objects this function will cause a termination * of the connection. */ -void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, - unsigned int set_size) +void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr, + unsigned int set_size) { + struct drbd_conf *mdev; struct drbd_tl_epoch *b, *nob; /* next old barrier */ struct list_head *le, *tle; struct drbd_request *r; - spin_lock_irq(&mdev->tconn->req_lock); + spin_lock_irq(&tconn->req_lock); - b = mdev->tconn->oldest_tle; + b = tconn->oldest_tle; /* first some paranoia code */ if (b == NULL) { - dev_err(DEV, "BAD! BarrierAck #%u received, but no epoch in tl!?\n", - barrier_nr); + conn_err(tconn, "BAD! BarrierAck #%u received, but no epoch in tl!?\n", + barrier_nr); goto bail; } if (b->br_number != barrier_nr) { - dev_err(DEV, "BAD! BarrierAck #%u received, expected #%u!\n", - barrier_nr, b->br_number); + conn_err(tconn, "BAD! BarrierAck #%u received, expected #%u!\n", + barrier_nr, b->br_number); goto bail; } if (b->n_writes != set_size) { - dev_err(DEV, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n", - barrier_nr, set_size, b->n_writes); + conn_err(tconn, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n", + barrier_nr, set_size, b->n_writes); goto bail; } @@ -296,28 +299,29 @@ void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, _req_mod(, BARRIER_ACKED) above. */ list_del_init(&b->requests); + mdev = b->w.mdev; nob = b->next; if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { - _tl_add_barrier(mdev, b); + _tl_add_barrier(tconn, b); if (nob) - mdev->tconn->oldest_tle = nob; + tconn->oldest_tle = nob; /* if nob == NULL b was the only barrier, and becomes the new - barrier. Therefore mdev->tconn->oldest_tle points already to b */ + barrier. Therefore tconn->oldest_tle points already to b */ } else { D_ASSERT(nob != NULL); - mdev->tconn->oldest_tle = nob; + tconn->oldest_tle = nob; kfree(b); } - spin_unlock_irq(&mdev->tconn->req_lock); + spin_unlock_irq(&tconn->req_lock); dec_ap_pending(mdev); return; bail: - spin_unlock_irq(&mdev->tconn->req_lock); - drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); + spin_unlock_irq(&tconn->req_lock); + conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD); } @@ -329,15 +333,15 @@ bail: * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO, * RESTART_FROZEN_DISK_IO. */ -void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) +void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) { struct drbd_tl_epoch *b, *tmp, **pn; struct list_head *le, *tle, carry_reads; struct drbd_request *req; int rv, n_writes, n_reads; - b = mdev->tconn->oldest_tle; - pn = &mdev->tconn->oldest_tle; + b = tconn->oldest_tle; + pn = &tconn->oldest_tle; while (b) { n_writes = 0; n_reads = 0; @@ -356,11 +360,11 @@ void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) b->n_writes = n_writes; if (b->w.cb == NULL) { b->w.cb = w_send_barrier; - inc_ap_pending(mdev); - set_bit(CREATE_BARRIER, &mdev->flags); + inc_ap_pending(b->w.mdev); + set_bit(CREATE_BARRIER, &b->w.mdev->flags); } - drbd_queue_work(&mdev->tconn->data.work, &b->w); + drbd_queue_work(&tconn->data.work, &b->w); } pn = &b->next; } else { @@ -374,11 +378,12 @@ void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) * the newest barrier may not have been queued yet, * in which case w.cb is still NULL. */ if (b->w.cb != NULL) - dec_ap_pending(mdev); + dec_ap_pending(b->w.mdev); - if (b == mdev->tconn->newest_tle) { + if (b == tconn->newest_tle) { /* recycle, but reinit! */ - D_ASSERT(tmp == NULL); + if (tmp != NULL) + conn_err(tconn, "ASSERT FAILED tmp == NULL"); INIT_LIST_HEAD(&b->requests); list_splice(&carry_reads, &b->requests); INIT_LIST_HEAD(&b->w.list); @@ -406,20 +411,23 @@ void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) * by the requests on the transfer gets marked as our of sync. Called from the * receiver thread and the worker thread. */ -void tl_clear(struct drbd_conf *mdev) +void tl_clear(struct drbd_tconn *tconn) { + struct drbd_conf *mdev; struct list_head *le, *tle; struct drbd_request *r; + int minor; - spin_lock_irq(&mdev->tconn->req_lock); + spin_lock_irq(&tconn->req_lock); - _tl_restart(mdev, CONNECTION_LOST_WHILE_PENDING); + _tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING); /* we expect this list to be empty. */ - D_ASSERT(list_empty(&mdev->tconn->out_of_sequence_requests)); + if (!list_empty(&tconn->out_of_sequence_requests)) + conn_err(tconn, "ASSERT FAILED list_empty(&out_of_sequence_requests)\n"); /* but just in case, clean it up anyways! */ - list_for_each_safe(le, tle, &mdev->tconn->out_of_sequence_requests) { + list_for_each_safe(le, tle, &tconn->out_of_sequence_requests) { r = list_entry(le, struct drbd_request, tl_requests); /* It would be nice to complete outside of spinlock. * But this is easier for now. */ @@ -427,16 +435,17 @@ void tl_clear(struct drbd_conf *mdev) } /* ensure bit indicating barrier is required is clear */ - clear_bit(CREATE_BARRIER, &mdev->flags); + idr_for_each_entry(&tconn->volumes, mdev, minor) + clear_bit(CREATE_BARRIER, &mdev->flags); - spin_unlock_irq(&mdev->tconn->req_lock); + spin_unlock_irq(&tconn->req_lock); } -void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) +void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) { - spin_lock_irq(&mdev->tconn->req_lock); - _tl_restart(mdev, what); - spin_unlock_irq(&mdev->tconn->req_lock); + spin_lock_irq(&tconn->req_lock); + _tl_restart(tconn, what); + spin_unlock_irq(&tconn->req_lock); } static int drbd_thread_setup(void *arg) @@ -2199,6 +2208,9 @@ struct drbd_tconn *drbd_new_tconn(char *name) if (!tconn->name) goto fail; + if (!tl_init(tconn)) + goto fail; + tconn->cstate = C_STANDALONE; mutex_init(&tconn->cstate_mutex); spin_lock_init(&tconn->req_lock); @@ -2224,6 +2236,7 @@ struct drbd_tconn *drbd_new_tconn(char *name) return tconn; fail: + tl_cleanup(tconn); kfree(tconn->name); kfree(tconn); @@ -2316,9 +2329,6 @@ struct drbd_conf *drbd_new_device(unsigned int minor) if (drbd_bm_init(mdev)) goto out_no_bitmap; - /* no need to lock access, we are still initializing this minor device. */ - if (!tl_init(mdev)) - goto out_no_tl; mdev->read_requests = RB_ROOT; mdev->write_requests = RB_ROOT; @@ -2334,8 +2344,6 @@ struct drbd_conf *drbd_new_device(unsigned int minor) /* out_whatever_else: kfree(mdev->current_epoch); */ out_no_epoch: - tl_cleanup(mdev); -out_no_tl: drbd_bm_cleanup(mdev); out_no_bitmap: __free_page(mdev->md_io_page); @@ -2357,7 +2365,6 @@ out_no_tconn: void drbd_free_mdev(struct drbd_conf *mdev) { kfree(mdev->current_epoch); - tl_cleanup(mdev); if (mdev->bitmap) /* should no longer be there. */ drbd_bm_cleanup(mdev); __free_page(mdev->md_io_page); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 33159e47e6e..b141f891f64 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1996,9 +1996,9 @@ static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0)); if (reply->ret_code == SS_SUCCESS) { if (mdev->state.conn < C_CONNECTED) - tl_clear(mdev); + tl_clear(mdev->tconn); if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED) - tl_restart(mdev, FAIL_FROZEN_DISK_IO); + tl_restart(mdev->tconn, FAIL_FROZEN_DISK_IO); } drbd_resume_io(mdev); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 66080e20408..fcdc2c1cc50 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3466,7 +3466,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd, for temporal network outages! */ spin_unlock_irq(&mdev->tconn->req_lock); dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n"); - tl_clear(mdev); + tl_clear(mdev->tconn); drbd_uuid_new_current(mdev); clear_bit(NEW_CUR_UUID, &mdev->flags); drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0)); @@ -4025,7 +4025,7 @@ static int drbd_disconnected(int vnr, void *p, void *data) mdev->p_uuid = NULL; if (!is_susp(mdev->state)) - tl_clear(mdev); + tl_clear(mdev->tconn); drbd_md_sync(mdev); @@ -4585,7 +4585,7 @@ static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd) { struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack; - tl_release(mdev, p->barrier, be32_to_cpu(p->set_size)); + tl_release(mdev->tconn, p->barrier, be32_to_cpu(p->set_size)); if (mdev->state.conn == C_AHEAD && atomic_read(&mdev->ap_in_flight) == 0 && diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index cfa5fba5303..fa799e372ba 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -885,7 +885,7 @@ allocate_barrier: * barrier packet, this request is queued within the same spinlock. */ if ((remote || send_oos) && mdev->tconn->unused_spare_tle && test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { - _tl_add_barrier(mdev, mdev->tconn->unused_spare_tle); + _tl_add_barrier(mdev->tconn, mdev->tconn->unused_spare_tle); mdev->tconn->unused_spare_tle = NULL; } else { D_ASSERT(!(remote && rw == WRITE && diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 0b3cd412d52..8c8c2588c4b 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -254,7 +254,8 @@ extern int __req_mod(struct drbd_request *req, enum drbd_req_event what, extern void complete_master_bio(struct drbd_conf *mdev, struct bio_and_error *m); extern void request_timer_fn(unsigned long data); -extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what); +extern void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what); +extern void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what); /* use this if you don't want to deal with calling complete_master_bio() * outside the spinlock, e.g. when walking some list on cleanup. */ diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 338e1f5c7cd..ffee90d6d37 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -37,7 +37,6 @@ struct after_state_chg_work { struct completion *done; }; -extern void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what); static int w_after_state_ch(struct drbd_work *w, int unused); static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); @@ -1009,7 +1008,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, if (ns.susp_fen) { /* case1: The outdate peer handler is successful: */ if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) { - tl_clear(mdev); + tl_clear(mdev->tconn); if (test_bit(NEW_CUR_UUID, &mdev->flags)) { drbd_uuid_new_current(mdev); clear_bit(NEW_CUR_UUID, &mdev->flags); @@ -1028,7 +1027,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, if (what != NOTHING) { spin_lock_irq(&mdev->tconn->req_lock); - _tl_restart(mdev, what); + _tl_restart(mdev->tconn, what); nsm.i &= mdev->state.i; _drbd_set_state(mdev, nsm, CS_VERBOSE, NULL); spin_unlock_irq(&mdev->tconn->req_lock); From 1aba4d7fcfabe999e0c99683b394aa76d5c42842 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 21 Feb 2011 15:38:08 +0100 Subject: [PATCH 147/609] drbd: Preparing the connector interface to operator on connections Up to now it only operated on minor numbers. Now it can work also on named connections. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_main.c | 15 ++++++ drivers/block/drbd/drbd_nl.c | 96 ++++++++++++++++++++++------------ include/linux/drbd.h | 19 +++++-- 4 files changed, 94 insertions(+), 37 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 48367e53a7a..033af199586 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1479,6 +1479,7 @@ extern void drbd_free_mdev(struct drbd_conf *mdev); struct drbd_tconn *drbd_new_tconn(char *name); extern void drbd_free_tconn(struct drbd_tconn *tconn); +struct drbd_tconn *conn_by_name(const char *name); extern int proc_details; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index cbec5ff2cc7..4761426f9ad 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2196,6 +2196,21 @@ static void drbd_init_workqueue(struct drbd_work_queue* wq) INIT_LIST_HEAD(&wq->q); } +struct drbd_tconn *conn_by_name(const char *name) +{ + struct drbd_tconn *tconn; + + write_lock_irq(&global_state_lock); + list_for_each_entry(tconn, &drbd_tconns, all_tconn) { + if (!strcmp(tconn->name, name)) + goto found; + } + tconn = NULL; +found: + write_unlock_irq(&global_state_lock); + return tconn; +} + struct drbd_tconn *drbd_new_tconn(char *name) { struct drbd_tconn *tconn; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index b141f891f64..27a43d138f6 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2184,42 +2184,57 @@ out: return 0; } +enum cn_handler_type { + CHT_MINOR, + CHT_CONN, + CHT_CTOR, + /* CHT_RES, later */ +}; + struct cn_handler_struct { - int (*function)(struct drbd_conf *, - struct drbd_nl_cfg_req *, - struct drbd_nl_cfg_reply *); + enum cn_handler_type type; + union { + int (*minor_based)(struct drbd_conf *, + struct drbd_nl_cfg_req *, + struct drbd_nl_cfg_reply *); + int (*conn_based)(struct drbd_tconn *, + struct drbd_nl_cfg_req *, + struct drbd_nl_cfg_reply *); + int (*constructor)(struct drbd_nl_cfg_req *, + struct drbd_nl_cfg_reply *); + }; int reply_body_size; }; static struct cn_handler_struct cnd_table[] = { - [ P_primary ] = { &drbd_nl_primary, 0 }, - [ P_secondary ] = { &drbd_nl_secondary, 0 }, - [ P_disk_conf ] = { &drbd_nl_disk_conf, 0 }, - [ P_detach ] = { &drbd_nl_detach, 0 }, - [ P_net_conf ] = { &drbd_nl_net_conf, 0 }, - [ P_disconnect ] = { &drbd_nl_disconnect, 0 }, - [ P_resize ] = { &drbd_nl_resize, 0 }, - [ P_syncer_conf ] = { &drbd_nl_syncer_conf, 0 }, - [ P_invalidate ] = { &drbd_nl_invalidate, 0 }, - [ P_invalidate_peer ] = { &drbd_nl_invalidate_peer, 0 }, - [ P_pause_sync ] = { &drbd_nl_pause_sync, 0 }, - [ P_resume_sync ] = { &drbd_nl_resume_sync, 0 }, - [ P_suspend_io ] = { &drbd_nl_suspend_io, 0 }, - [ P_resume_io ] = { &drbd_nl_resume_io, 0 }, - [ P_outdate ] = { &drbd_nl_outdate, 0 }, - [ P_get_config ] = { &drbd_nl_get_config, + [ P_primary ] = { CHT_MINOR, { &drbd_nl_primary }, 0 }, + [ P_secondary ] = { CHT_MINOR, { &drbd_nl_secondary }, 0 }, + [ P_disk_conf ] = { CHT_MINOR, { &drbd_nl_disk_conf }, 0 }, + [ P_detach ] = { CHT_MINOR, { &drbd_nl_detach }, 0 }, + [ P_net_conf ] = { CHT_MINOR, { &drbd_nl_net_conf }, 0 }, + [ P_disconnect ] = { CHT_MINOR, { &drbd_nl_disconnect }, 0 }, + [ P_resize ] = { CHT_MINOR, { &drbd_nl_resize }, 0 }, + [ P_syncer_conf ] = { CHT_MINOR, { &drbd_nl_syncer_conf },0 }, + [ P_invalidate ] = { CHT_MINOR, { &drbd_nl_invalidate }, 0 }, + [ P_invalidate_peer ] = { CHT_MINOR, { &drbd_nl_invalidate_peer },0 }, + [ P_pause_sync ] = { CHT_MINOR, { &drbd_nl_pause_sync }, 0 }, + [ P_resume_sync ] = { CHT_MINOR, { &drbd_nl_resume_sync },0 }, + [ P_suspend_io ] = { CHT_MINOR, { &drbd_nl_suspend_io }, 0 }, + [ P_resume_io ] = { CHT_MINOR, { &drbd_nl_resume_io }, 0 }, + [ P_outdate ] = { CHT_MINOR, { &drbd_nl_outdate }, 0 }, + [ P_get_config ] = { CHT_MINOR, { &drbd_nl_get_config }, sizeof(struct syncer_conf_tag_len_struct) + sizeof(struct disk_conf_tag_len_struct) + sizeof(struct net_conf_tag_len_struct) }, - [ P_get_state ] = { &drbd_nl_get_state, + [ P_get_state ] = { CHT_MINOR, { &drbd_nl_get_state }, sizeof(struct get_state_tag_len_struct) + sizeof(struct sync_progress_tag_len_struct) }, - [ P_get_uuids ] = { &drbd_nl_get_uuids, + [ P_get_uuids ] = { CHT_MINOR, { &drbd_nl_get_uuids }, sizeof(struct get_uuids_tag_len_struct) }, - [ P_get_timeout_flag ] = { &drbd_nl_get_timeout_flag, + [ P_get_timeout_flag ] = { CHT_MINOR, { &drbd_nl_get_timeout_flag }, sizeof(struct get_timeout_flag_tag_len_struct)}, - [ P_start_ov ] = { &drbd_nl_start_ov, 0 }, - [ P_new_c_uuid ] = { &drbd_nl_new_c_uuid, 0 }, + [ P_start_ov ] = { CHT_MINOR, { &drbd_nl_start_ov }, 0 }, + [ P_new_c_uuid ] = { CHT_MINOR, { &drbd_nl_new_c_uuid }, 0 }, }; static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp) @@ -2229,6 +2244,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms struct cn_msg *cn_reply; struct drbd_nl_cfg_reply *reply; struct drbd_conf *mdev; + struct drbd_tconn *tconn; int retcode, rr; int reply_size = sizeof(struct cn_msg) + sizeof(struct drbd_nl_cfg_reply) @@ -2244,13 +2260,6 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms goto fail; } - mdev = ensure_mdev(nlp->drbd_minor, - (nlp->flags & DRBD_NL_CREATE_DEVICE)); - if (!mdev) { - retcode = ERR_MINOR_INVALID; - goto fail; - } - if (nlp->packet_type >= P_nl_after_last_packet || nlp->packet_type == P_return_code_only) { retcode = ERR_PACKET_NR; @@ -2260,7 +2269,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms cm = cnd_table + nlp->packet_type; /* This may happen if packet number is 0: */ - if (cm->function == NULL) { + if (cm->minor_based == NULL) { retcode = ERR_PACKET_NR; goto fail; } @@ -2281,7 +2290,28 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */ /* reply->tag_list; might be modified by cm->function. */ - rr = cm->function(mdev, nlp, reply); + retcode = ERR_MINOR_INVALID; + rr = 0; + switch (cm->type) { + case CHT_MINOR: + mdev = minor_to_mdev(nlp->drbd_minor); + if (!mdev) + goto fail; + rr = cm->minor_based(mdev, nlp, reply); + break; + case CHT_CONN: + tconn = conn_by_name(nlp->obj_name); + if (!tconn) { + retcode = ERR_CONN_NOT_KNOWN; + goto fail; + } + rr = cm->conn_based(tconn, nlp, reply); + break; + case CHT_CTOR: + rr = cm->constructor(nlp, reply); + break; + /* case CHT_RES: */ + } cn_reply->id = req->id; cn_reply->seq = req->seq; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 70a688b92c1..7683b4ab658 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -155,6 +155,7 @@ enum drbd_ret_code { ERR_CONG_NOT_PROTO_A = 155, ERR_PIC_AFTER_DEP = 156, ERR_PIC_PEER_DEP = 157, + ERR_CONN_NOT_KNOWN = 158, /* insert new ones above this line */ AFTER_LAST_ERR_CODE @@ -347,8 +348,11 @@ enum drbd_timeout_flag { /* Start of the new netlink/connector stuff */ -#define DRBD_NL_CREATE_DEVICE 0x01 -#define DRBD_NL_SET_DEFAULTS 0x02 +enum drbd_ncr_flags { + DRBD_NL_CREATE_DEVICE = 0x01, + DRBD_NL_SET_DEFAULTS = 0x02, +}; +#define DRBD_NL_OBJ_NAME_LEN 32 /* For searching a vacant cn_idx value */ @@ -356,8 +360,15 @@ enum drbd_timeout_flag { struct drbd_nl_cfg_req { int packet_type; - unsigned int drbd_minor; - int flags; + union { + struct { + unsigned int drbd_minor; + enum drbd_ncr_flags flags; + }; + struct { + char obj_name[DRBD_NL_OBJ_NAME_LEN]; + }; + }; unsigned short tag_list[]; }; From 80883197da071239ed9e76bd3b9d8c9c5e19e4e6 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 18 Feb 2011 14:56:45 +0100 Subject: [PATCH 148/609] drbd: Converted drbd_nl_(net_conf|disconnect)() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 + drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_nl.c | 120 ++++++++++++++++----------------- 3 files changed, 63 insertions(+), 61 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 033af199586..a27e2a4e038 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -170,6 +170,7 @@ drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) { extern struct drbd_conf **minor_table; extern struct ratelimit_state drbd_ratelimit_state; +extern struct list_head drbd_tconns; /* on the wire */ enum drbd_packet { @@ -1474,6 +1475,7 @@ extern wait_queue_head_t drbd_pp_wait; extern rwlock_t global_state_lock; +extern int conn_lowest_minor(struct drbd_tconn *tconn); extern struct drbd_conf *drbd_new_device(unsigned int minor); extern void drbd_free_mdev(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 4761426f9ad..2bfd63058f4 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -615,7 +615,7 @@ char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *tas } #ifdef CONFIG_SMP -static int conn_lowest_minor(struct drbd_tconn *tconn) +int conn_lowest_minor(struct drbd_tconn *tconn) { int minor = 0; idr_get_next(&tconn->volumes, &minor); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 27a43d138f6..455a51dd364 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1326,7 +1326,7 @@ static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, return 0; } -static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_net_conf(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { int i; @@ -1335,16 +1335,17 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct crypto_hash *tfm = NULL; struct crypto_hash *integrity_w_tfm = NULL; struct crypto_hash *integrity_r_tfm = NULL; - struct drbd_conf *odev; + struct drbd_conf *mdev; char hmac_name[CRYPTO_MAX_ALG_NAME]; void *int_dig_out = NULL; void *int_dig_in = NULL; void *int_dig_vv = NULL; + struct drbd_tconn *oconn; struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr; - conn_reconfig_start(mdev->tconn); + conn_reconfig_start(tconn); - if (mdev->state.conn > C_STANDALONE) { + if (tconn->cstate > C_STANDALONE) { retcode = ERR_NET_CONFIGURED; goto fail; } @@ -1387,13 +1388,25 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, goto fail; } - if (get_ldev(mdev)) { - enum drbd_fencing_p fp = mdev->ldev->dc.fencing; - put_ldev(mdev); - if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) { - retcode = ERR_STONITH_AND_PROT_A; + idr_for_each_entry(&tconn->volumes, mdev, i) { + if (get_ldev(mdev)) { + enum drbd_fencing_p fp = mdev->ldev->dc.fencing; + put_ldev(mdev); + if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) { + retcode = ERR_STONITH_AND_PROT_A; + goto fail; + } + } + if (mdev->state.role == R_PRIMARY && new_conf->want_lose) { + retcode = ERR_DISCARD; goto fail; } + if (!mdev->bitmap) { + if(drbd_bm_init(mdev)) { + retcode = ERR_NOMEM; + goto fail; + } + } } if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) { @@ -1401,31 +1414,25 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, goto fail; } - if (mdev->state.role == R_PRIMARY && new_conf->want_lose) { - retcode = ERR_DISCARD; - goto fail; - } - retcode = NO_ERROR; new_my_addr = (struct sockaddr *)&new_conf->my_addr; new_peer_addr = (struct sockaddr *)&new_conf->peer_addr; - for (i = 0; i < minor_count; i++) { - odev = minor_to_mdev(i); - if (!odev || odev == mdev) + list_for_each_entry(oconn, &drbd_tconns, all_tconn) { + if (oconn == tconn) continue; - if (get_net_conf(odev->tconn)) { - taken_addr = (struct sockaddr *)&odev->tconn->net_conf->my_addr; - if (new_conf->my_addr_len == odev->tconn->net_conf->my_addr_len && + if (get_net_conf(oconn)) { + taken_addr = (struct sockaddr *)&oconn->net_conf->my_addr; + if (new_conf->my_addr_len == oconn->net_conf->my_addr_len && !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len)) retcode = ERR_LOCAL_ADDR; - taken_addr = (struct sockaddr *)&odev->tconn->net_conf->peer_addr; - if (new_conf->peer_addr_len == odev->tconn->net_conf->peer_addr_len && + taken_addr = (struct sockaddr *)&oconn->net_conf->peer_addr; + if (new_conf->peer_addr_len == oconn->net_conf->peer_addr_len && !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len)) retcode = ERR_PEER_ADDR; - put_net_conf(odev->tconn); + put_net_conf(oconn); if (retcode != NO_ERROR) goto fail; } @@ -1470,6 +1477,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, ((char *)new_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0; + /* allocation not in the IO path, cqueue thread context */ if (integrity_w_tfm) { i = crypto_hash_digestsize(integrity_w_tfm); int_dig_out = kmalloc(i, GFP_KERNEL); @@ -1489,46 +1497,40 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, } } - if (!mdev->bitmap) { - if(drbd_bm_init(mdev)) { - retcode = ERR_NOMEM; - goto fail; - } - } - - drbd_flush_workqueue(mdev); - spin_lock_irq(&mdev->tconn->req_lock); - if (mdev->tconn->net_conf != NULL) { + conn_flush_workqueue(tconn); + spin_lock_irq(&tconn->req_lock); + if (tconn->net_conf != NULL) { retcode = ERR_NET_CONFIGURED; - spin_unlock_irq(&mdev->tconn->req_lock); + spin_unlock_irq(&tconn->req_lock); goto fail; } - mdev->tconn->net_conf = new_conf; + tconn->net_conf = new_conf; - mdev->send_cnt = 0; - mdev->recv_cnt = 0; + crypto_free_hash(tconn->cram_hmac_tfm); + tconn->cram_hmac_tfm = tfm; - crypto_free_hash(mdev->tconn->cram_hmac_tfm); - mdev->tconn->cram_hmac_tfm = tfm; + crypto_free_hash(tconn->integrity_w_tfm); + tconn->integrity_w_tfm = integrity_w_tfm; - crypto_free_hash(mdev->tconn->integrity_w_tfm); - mdev->tconn->integrity_w_tfm = integrity_w_tfm; + crypto_free_hash(tconn->integrity_r_tfm); + tconn->integrity_r_tfm = integrity_r_tfm; - crypto_free_hash(mdev->tconn->integrity_r_tfm); - mdev->tconn->integrity_r_tfm = integrity_r_tfm; + kfree(tconn->int_dig_out); + kfree(tconn->int_dig_in); + kfree(tconn->int_dig_vv); + tconn->int_dig_out=int_dig_out; + tconn->int_dig_in=int_dig_in; + tconn->int_dig_vv=int_dig_vv; + retcode = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE); + spin_unlock_irq(&tconn->req_lock); - kfree(mdev->tconn->int_dig_out); - kfree(mdev->tconn->int_dig_in); - kfree(mdev->tconn->int_dig_vv); - mdev->tconn->int_dig_out=int_dig_out; - mdev->tconn->int_dig_in=int_dig_in; - mdev->tconn->int_dig_vv=int_dig_vv; - retcode = _conn_request_state(mdev->tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE); - spin_unlock_irq(&mdev->tconn->req_lock); - - kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); + idr_for_each_entry(&tconn->volumes, mdev, i) { + mdev->send_cnt = 0; + mdev->recv_cnt = 0; + kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); + } reply->ret_code = retcode; - conn_reconfig_done(mdev->tconn); + conn_reconfig_done(tconn); return 0; fail: @@ -1541,14 +1543,13 @@ fail: kfree(new_conf); reply->ret_code = retcode; - conn_reconfig_done(mdev->tconn); + conn_reconfig_done(tconn); return 0; } -static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_disconnect(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { - struct drbd_tconn *tconn = mdev->tconn; int retcode; struct disconnect dc; @@ -1600,7 +1601,6 @@ static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl done: retcode = NO_ERROR; fail: - drbd_md_sync(mdev); reply->ret_code = retcode; return 0; } @@ -2211,8 +2211,8 @@ static struct cn_handler_struct cnd_table[] = { [ P_secondary ] = { CHT_MINOR, { &drbd_nl_secondary }, 0 }, [ P_disk_conf ] = { CHT_MINOR, { &drbd_nl_disk_conf }, 0 }, [ P_detach ] = { CHT_MINOR, { &drbd_nl_detach }, 0 }, - [ P_net_conf ] = { CHT_MINOR, { &drbd_nl_net_conf }, 0 }, - [ P_disconnect ] = { CHT_MINOR, { &drbd_nl_disconnect }, 0 }, + [ P_net_conf ] = { CHT_CONN, { .conn_based = &drbd_nl_net_conf }, 0 }, + [ P_disconnect ] = { CHT_CONN, { .conn_based = &drbd_nl_disconnect }, 0 }, [ P_resize ] = { CHT_MINOR, { &drbd_nl_resize }, 0 }, [ P_syncer_conf ] = { CHT_MINOR, { &drbd_nl_syncer_conf },0 }, [ P_invalidate ] = { CHT_MINOR, { &drbd_nl_invalidate }, 0 }, From 774b305518a68a50df4f479bcf79da2add724e6e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 22 Feb 2011 02:07:03 -0500 Subject: [PATCH 149/609] drbd: Implemented new commands to create/delete connections/minors Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 +- drivers/block/drbd/drbd_main.c | 66 +++++++++++--------- drivers/block/drbd/drbd_nl.c | 106 ++++++++++++++++++++------------- include/linux/drbd.h | 3 + include/linux/drbd_nl.h | 12 ++++ 5 files changed, 119 insertions(+), 72 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index a27e2a4e038..535d503886d 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1258,7 +1258,6 @@ extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); extern void drbd_go_diskless(struct drbd_conf *mdev); extern void drbd_ldev_destroy(struct drbd_conf *mdev); - /* Meta data layout We reserve a 128MB Block (4k aligned) * either at the end of the backing device @@ -1476,8 +1475,9 @@ extern wait_queue_head_t drbd_pp_wait; extern rwlock_t global_state_lock; extern int conn_lowest_minor(struct drbd_tconn *tconn); -extern struct drbd_conf *drbd_new_device(unsigned int minor); +enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr); extern void drbd_free_mdev(struct drbd_conf *mdev); +extern void drbd_delete_device(unsigned int minor); struct drbd_tconn *drbd_new_tconn(char *name); extern void drbd_free_tconn(struct drbd_tconn *tconn); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2bfd63058f4..ec7d0d98657 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -614,13 +614,16 @@ char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *tas return thi ? thi->name : task->comm; } -#ifdef CONFIG_SMP int conn_lowest_minor(struct drbd_tconn *tconn) { int minor = 0; - idr_get_next(&tconn->volumes, &minor); + + if (!idr_get_next(&tconn->volumes, &minor)) + return -1; return minor; } + +#ifdef CONFIG_SMP /** * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs * @mdev: DRBD device. @@ -2078,15 +2081,16 @@ static void drbd_release_ee_lists(struct drbd_conf *mdev) dev_err(DEV, "%d EEs in net list found!\n", rr); } -/* caution. no locking. - * currently only used from module cleanup code. */ -static void drbd_delete_device(unsigned int minor) +/* caution. no locking. */ +void drbd_delete_device(unsigned int minor) { struct drbd_conf *mdev = minor_to_mdev(minor); if (!mdev) return; + idr_remove(&mdev->tconn->volumes, minor); + /* paranoia asserts */ D_ASSERT(mdev->open_cnt == 0); D_ASSERT(list_empty(&mdev->tconn->data.work.q)); @@ -2101,7 +2105,6 @@ static void drbd_delete_device(unsigned int minor) bdput(mdev->this_bdev); drbd_free_resources(mdev); - drbd_free_tconn(mdev->tconn); drbd_release_ee_lists(mdev); @@ -2223,6 +2226,9 @@ struct drbd_tconn *drbd_new_tconn(char *name) if (!tconn->name) goto fail; + if (!zalloc_cpumask_var(&tconn->cpu_mask, GFP_KERNEL)) + goto fail; + if (!tl_init(tconn)) goto fail; @@ -2252,6 +2258,7 @@ struct drbd_tconn *drbd_new_tconn(char *name) fail: tl_cleanup(tconn); + free_cpumask_var(tconn->cpu_mask); kfree(tconn->name); kfree(tconn); @@ -2265,6 +2272,7 @@ void drbd_free_tconn(struct drbd_tconn *tconn) write_unlock_irq(&global_state_lock); idr_destroy(&tconn->volumes); + free_cpumask_var(tconn->cpu_mask); kfree(tconn->name); kfree(tconn->int_dig_out); kfree(tconn->int_dig_in); @@ -2272,32 +2280,31 @@ void drbd_free_tconn(struct drbd_tconn *tconn) kfree(tconn); } -struct drbd_conf *drbd_new_device(unsigned int minor) +enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr) { struct drbd_conf *mdev; struct gendisk *disk; struct request_queue *q; - char conn_name[9]; /* drbd1234N */ - int vnr; + int vnr_got = vnr; + + mdev = minor_to_mdev(minor); + if (mdev) + return ERR_MINOR_EXISTS; /* GFP_KERNEL, we are outside of all write-out paths */ mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL); if (!mdev) - return NULL; - sprintf(conn_name, "drbd%d", minor); - mdev->tconn = drbd_new_tconn(conn_name); - if (!mdev->tconn) - goto out_no_tconn; - if (!idr_pre_get(&mdev->tconn->volumes, GFP_KERNEL)) - goto out_no_cpumask; - if (idr_get_new(&mdev->tconn->volumes, mdev, &vnr)) - goto out_no_cpumask; - if (vnr != 0) { - dev_err(DEV, "vnr = %d\n", vnr); - goto out_no_cpumask; + return ERR_NOMEM; + + mdev->tconn = tconn; + if (!idr_pre_get(&tconn->volumes, GFP_KERNEL)) + goto out_no_idr; + if (idr_get_new(&tconn->volumes, mdev, &vnr_got)) + goto out_no_idr; + if (vnr_got != vnr) { + dev_err(DEV, "vnr_got (%d) != vnr (%d)\n", vnr_got, vnr); + goto out_no_q; } - if (!zalloc_cpumask_var(&mdev->tconn->cpu_mask, GFP_KERNEL)) - goto out_no_cpumask; mdev->minor = minor; @@ -2354,7 +2361,10 @@ struct drbd_conf *drbd_new_device(unsigned int minor) INIT_LIST_HEAD(&mdev->current_epoch->list); mdev->epochs = 1; - return mdev; + minor_table[minor] = mdev; + add_disk(disk); + + return NO_ERROR; /* out_whatever_else: kfree(mdev->current_epoch); */ @@ -2367,12 +2377,10 @@ out_no_io_page: out_no_disk: blk_cleanup_queue(q); out_no_q: - free_cpumask_var(mdev->tconn->cpu_mask); -out_no_cpumask: - drbd_free_tconn(mdev->tconn); -out_no_tconn: + idr_remove(&tconn->volumes, vnr_got); +out_no_idr: kfree(mdev); - return NULL; + return ERR_NOMEM; } /* counterpart of drbd_new_device. diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 455a51dd364..f2739fd188a 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -443,40 +443,6 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) return rv; } -static struct drbd_conf *ensure_mdev(int minor, int create) -{ - struct drbd_conf *mdev; - - if (minor >= minor_count) - return NULL; - - mdev = minor_to_mdev(minor); - - if (!mdev && create) { - struct gendisk *disk = NULL; - mdev = drbd_new_device(minor); - - spin_lock_irq(&drbd_pp_lock); - if (minor_table[minor] == NULL) { - minor_table[minor] = mdev; - disk = mdev->vdisk; - mdev = NULL; - } /* else: we lost the race */ - spin_unlock_irq(&drbd_pp_lock); - - if (disk) /* we won the race above */ - /* in case we ever add a drbd_delete_device(), - * don't forget the del_gendisk! */ - add_disk(disk); - else /* we lost the race above */ - drbd_free_mdev(mdev); - - mdev = minor_to_mdev(minor); - } - - return mdev; -} - static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { @@ -1789,12 +1755,6 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n if (!expect(sc.al_extents <= DRBD_AL_EXTENTS_MAX)) sc.al_extents = DRBD_AL_EXTENTS_MAX; - /* to avoid spurious errors when configuring minors before configuring - * the minors they depend on: if necessary, first create the minor we - * depend on */ - if (sc.after >= 0) - ensure_mdev(sc.after, 1); - /* most sanity checks done, try to assign the new sync-after * dependency. need to hold the global lock in there, * to avoid a race in the dependency loop check. */ @@ -2184,13 +2144,73 @@ out: return 0; } +static int drbd_nl_new_conn(struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) +{ + struct new_connection args; + + if (!new_connection_from_tags(nlp->tag_list, &args)) { + reply->ret_code = ERR_MANDATORY_TAG; + return 0; + } + + reply->ret_code = NO_ERROR; + if (!drbd_new_tconn(args.name)) + reply->ret_code = ERR_NOMEM; + + return 0; +} + +static int drbd_nl_new_minor(struct drbd_tconn *tconn, + struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) +{ + struct new_minor args; + + args.vol_nr = 0; + args.minor = 0; + + if (!new_minor_from_tags(nlp->tag_list, &args)) { + reply->ret_code = ERR_MANDATORY_TAG; + return 0; + } + + reply->ret_code = conn_new_minor(tconn, args.minor, args.vol_nr); + + return 0; +} + +static int drbd_nl_del_minor(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + if (mdev->state.disk == D_DISKLESS && + mdev->state.conn == C_STANDALONE && + mdev->state.role == R_SECONDARY) { + drbd_delete_device(mdev_to_minor(mdev)); + reply->ret_code = NO_ERROR; + } else { + reply->ret_code = ERR_MINOR_CONFIGURED; + } + return 0; +} + +static int drbd_nl_del_conn(struct drbd_tconn *tconn, + struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) +{ + if (conn_lowest_minor(tconn) < 0) { + drbd_free_tconn(tconn); + reply->ret_code = NO_ERROR; + } else { + reply->ret_code = ERR_CONN_IN_USE; + } + + return 0; +} + enum cn_handler_type { CHT_MINOR, CHT_CONN, CHT_CTOR, /* CHT_RES, later */ }; - struct cn_handler_struct { enum cn_handler_type type; union { @@ -2235,6 +2255,10 @@ static struct cn_handler_struct cnd_table[] = { sizeof(struct get_timeout_flag_tag_len_struct)}, [ P_start_ov ] = { CHT_MINOR, { &drbd_nl_start_ov }, 0 }, [ P_new_c_uuid ] = { CHT_MINOR, { &drbd_nl_new_c_uuid }, 0 }, + [ P_new_connection ] = { CHT_CTOR, { .constructor = &drbd_nl_new_conn }, 0 }, + [ P_new_minor ] = { CHT_CONN, { .conn_based = &drbd_nl_new_minor }, 0 }, + [ P_del_minor ] = { CHT_MINOR, { &drbd_nl_del_minor }, 0 }, + [ P_del_connection ] = { CHT_CONN, { .conn_based = &drbd_nl_del_conn }, 0 }, }; static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp) diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 7683b4ab658..e192167e614 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -156,6 +156,9 @@ enum drbd_ret_code { ERR_PIC_AFTER_DEP = 156, ERR_PIC_PEER_DEP = 157, ERR_CONN_NOT_KNOWN = 158, + ERR_CONN_IN_USE = 159, + ERR_MINOR_CONFIGURED = 160, + ERR_MINOR_EXISTS = 161, /* insert new ones above this line */ AFTER_LAST_ERR_CODE diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index ab6159e4fcf..1216c7a432c 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -152,6 +152,18 @@ NL_PACKET(new_c_uuid, 26, NL_RESPONSE(return_code_only, 27) #endif +NL_PACKET(new_connection, 28, /* CHT_CTOR */ + NL_STRING( 85, T_MANDATORY, name, DRBD_NL_OBJ_NAME_LEN) +) + +NL_PACKET(new_minor, 29, /* CHT_CONN */ + NL_INTEGER( 86, T_MANDATORY, minor) + NL_INTEGER( 87, T_MANDATORY, vol_nr) +) + +NL_PACKET(del_minor, 30, ) /* CHT_MINOR */ +NL_PACKET(del_connection, 31, ) /* CHT_CONN */ + #undef NL_PACKET #undef NL_INTEGER #undef NL_INT64 From 81a5d60ecfe1d94627abb54810445f0fd5892f42 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 22 Feb 2011 19:53:16 -0500 Subject: [PATCH 150/609] drbd: Replaced the minor_table array by an idr Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 8 ++---- drivers/block/drbd/drbd_main.c | 42 ++++++++++++++++-------------- drivers/block/drbd/drbd_proc.c | 14 +++------- drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/drbd/drbd_worker.c | 10 ++----- 5 files changed, 31 insertions(+), 45 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 535d503886d..783526ab7b2 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -168,8 +168,8 @@ drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) { /* 4th incarnation of the disk layout. */ #define DRBD_MD_MAGIC (DRBD_MAGIC+4) -extern struct drbd_conf **minor_table; extern struct ratelimit_state drbd_ratelimit_state; +extern struct idr minors; extern struct list_head drbd_tconns; /* on the wire */ @@ -1109,11 +1109,7 @@ struct drbd_conf { static inline struct drbd_conf *minor_to_mdev(unsigned int minor) { - struct drbd_conf *mdev; - - mdev = minor < minor_count ? minor_table[minor] : NULL; - - return mdev; + return (struct drbd_conf *)idr_find(&minors, minor); } static inline unsigned int mdev_to_minor(struct drbd_conf *mdev) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index ec7d0d98657..6e190c0c9f6 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -74,7 +74,7 @@ MODULE_AUTHOR("Philipp Reisner , " MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION); MODULE_VERSION(REL_VERSION); MODULE_LICENSE("GPL"); -MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices (" +MODULE_PARM_DESC(minor_count, "Approximate number of drbd devices (" __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")"); MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR); @@ -120,7 +120,7 @@ module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0 /* in 2.6.x, our device mapping and config info contains our virtual gendisks * as member "struct gendisk *vdisk;" */ -struct drbd_conf **minor_table; +struct idr minors; struct list_head drbd_tconns; /* list of struct drbd_tconn */ struct kmem_cache *drbd_request_cache; @@ -2118,11 +2118,13 @@ void drbd_delete_device(unsigned int minor) * allocated from drbd_new_device * and actually free the mdev itself */ drbd_free_mdev(mdev); + idr_remove(&minors, minor); } static void drbd_cleanup(void) { unsigned int i; + struct drbd_conf *mdev; unregister_reboot_notifier(&drbd_notifier); @@ -2139,17 +2141,13 @@ static void drbd_cleanup(void) drbd_nl_cleanup(); - if (minor_table) { - i = minor_count; - while (i--) - drbd_delete_device(i); - drbd_destroy_mempools(); - } - - kfree(minor_table); - + idr_for_each_entry(&minors, mdev, i) + drbd_delete_device(i); + drbd_destroy_mempools(); unregister_blkdev(DRBD_MAJOR, "drbd"); + idr_destroy(&minors); + printk(KERN_INFO "drbd: module cleanup done.\n"); } @@ -2286,6 +2284,7 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, struct gendisk *disk; struct request_queue *q; int vnr_got = vnr; + int minor_got = minor; mdev = minor_to_mdev(minor); if (mdev) @@ -2361,13 +2360,20 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, INIT_LIST_HEAD(&mdev->current_epoch->list); mdev->epochs = 1; - minor_table[minor] = mdev; + if (!idr_pre_get(&minors, GFP_KERNEL)) + goto out_no_minor_idr; + if (idr_get_new(&minors, mdev, &minor_got)) + goto out_no_minor_idr; + if (minor_got != minor) { + idr_remove(&minors, minor_got); + goto out_no_minor_idr; + } add_disk(disk); return NO_ERROR; -/* out_whatever_else: - kfree(mdev->current_epoch); */ +out_no_minor_idr: + kfree(mdev->current_epoch); out_no_epoch: drbd_bm_cleanup(mdev); out_no_bitmap: @@ -2406,7 +2412,7 @@ int __init drbd_init(void) if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) { printk(KERN_ERR - "drbd: invalid minor_count (%d)\n", minor_count); + "drbd: invalid minor_count (%d)\n", minor_count); #ifdef MODULE return -EINVAL; #else @@ -2436,10 +2442,7 @@ int __init drbd_init(void) init_waitqueue_head(&drbd_pp_wait); drbd_proc = NULL; /* play safe for drbd_cleanup */ - minor_table = kzalloc(sizeof(struct drbd_conf *)*minor_count, - GFP_KERNEL); - if (!minor_table) - goto Enomem; + idr_init(&minors); err = drbd_create_mempools(); if (err) @@ -2460,7 +2463,6 @@ int __init drbd_init(void) printk(KERN_INFO "drbd: %s\n", drbd_buildtag()); printk(KERN_INFO "drbd: registered as block device major %d\n", DRBD_MAJOR); - printk(KERN_INFO "drbd: minor_table @ 0x%p\n", minor_table); return 0; /* Success! */ diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 4e53cb3d99e..36c9a6cecdc 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -194,7 +194,7 @@ static void resync_dump_detail(struct seq_file *seq, struct lc_element *e) static int drbd_seq_show(struct seq_file *seq, void *v) { - int i, hole = 0; + int i, prev_i = -1; const char *sn; struct drbd_conf *mdev; @@ -227,16 +227,10 @@ static int drbd_seq_show(struct seq_file *seq, void *v) oos .. known out-of-sync kB */ - for (i = 0; i < minor_count; i++) { - mdev = minor_to_mdev(i); - if (!mdev) { - hole = 1; - continue; - } - if (hole) { - hole = 0; + idr_for_each_entry(&minors, mdev, i) { + if (prev_i != i - 1) seq_printf(seq, "\n"); - } + prev_i = i; sn = drbd_conn_str(mdev->state.conn); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index fcdc2c1cc50..e44bf3c2571 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -284,7 +284,7 @@ static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net) atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use; int i; - if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count) + if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count) i = page_chain_free(page); else { struct page *tmp; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 5cb5ffce097..e459cb2076b 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1349,10 +1349,7 @@ static int _drbd_pause_after(struct drbd_conf *mdev) struct drbd_conf *odev; int i, rv = 0; - for (i = 0; i < minor_count; i++) { - odev = minor_to_mdev(i); - if (!odev) - continue; + idr_for_each_entry(&minors, odev, i) { if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) continue; if (!_drbd_may_sync_now(odev)) @@ -1374,10 +1371,7 @@ static int _drbd_resume_next(struct drbd_conf *mdev) struct drbd_conf *odev; int i, rv = 0; - for (i = 0; i < minor_count; i++) { - odev = minor_to_mdev(i); - if (!odev) - continue; + idr_for_each_entry(&minors, odev, i) { if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) continue; if (odev->state.aftr_isp) { From 2c4a48d097e511e325b63c9caca3a9b94fe03be4 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 23 Feb 2011 17:18:24 +0100 Subject: [PATCH 151/609] drbd: remove unused define Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 6e190c0c9f6..12c9a704ea0 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -149,8 +149,6 @@ static const struct block_device_operations drbd_ops = { .release = drbd_release, }; -#define ARRY_SIZE(A) (sizeof(A)/sizeof(A[0])) - #ifdef __CHECKER__ /* When checking with sparse, and this is an inline function, sparse will give tons of false positives. When this is a real functions sparse works. From a5df0e199cf6b31400fa86f6c3f73fa6e127e9ed Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 23 Feb 2011 12:51:43 +0100 Subject: [PATCH 152/609] drbd: default to detach on-io-error Old default behaviour was "pass-on", which is not useful in production at all. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 75f05af3372..22920a8af4e 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -125,7 +125,7 @@ #define DRBD_DISK_SIZE_SECT_MAX (1 * (2LLU << 40)) #define DRBD_DISK_SIZE_SECT_DEF 0 /* = disabled = no user size... */ -#define DRBD_ON_IO_ERROR_DEF EP_PASS_ON +#define DRBD_ON_IO_ERROR_DEF EP_DETACH #define DRBD_FENCING_DEF FP_DONT_CARE #define DRBD_AFTER_SB_0P_DEF ASB_DISCONNECT #define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT From 3c13b680ce210313c6f7ad163435b62979958c09 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 23 Feb 2011 16:10:01 +0100 Subject: [PATCH 153/609] drbd: only wakeup if something changed in update_peer_seq This commit got it wrong: drbd: Make the peer_seq updating code more obvious Make it more clear that update_peer_seq() is supposed to wake up the seq_wait queue whenever the sequence number changes. We don't need to wake up everytime we receive a sequence number that is _different_ from our currently stored "newest" sequence number, but only if we receive a sequence number _newer_ than what we already have, when we actually change mdev->peer_seq. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e44bf3c2571..cd78ebfefe5 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1734,14 +1734,15 @@ static bool need_peer_seq(struct drbd_conf *mdev) static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq) { - unsigned int old_peer_seq; + unsigned int newest_peer_seq; if (need_peer_seq(mdev)) { spin_lock(&mdev->peer_seq_lock); - old_peer_seq = mdev->peer_seq; - mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq); + newest_peer_seq = seq_max(mdev->peer_seq, peer_seq); + mdev->peer_seq = newest_peer_seq; spin_unlock(&mdev->peer_seq_lock); - if (old_peer_seq != peer_seq) + /* wake up only if we actually changed mdev->peer_seq */ + if (peer_seq == newest_peer_seq) wake_up(&mdev->seq_wait); } } From 35abf5942427f5062e4aae90dab9edb9dda8d200 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 23 Feb 2011 12:39:46 +0100 Subject: [PATCH 154/609] drbd: add page pool to be used for meta data IO Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 23 ++++++++++++++++++++++- drivers/block/drbd/drbd_main.c | 9 +++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 783526ab7b2..2444a168347 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1463,11 +1463,32 @@ extern struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ extern mempool_t *drbd_request_mempool; extern mempool_t *drbd_ee_mempool; -extern struct page *drbd_pp_pool; /* drbd's page pool */ +/* drbd's page pool, used to buffer data received from the peer, + * or data requested by the peer. + * + * This does not have an emergency reserve. + * + * When allocating from this pool, it first takes pages from the pool. + * Only if the pool is depleted will try to allocate from the system. + * + * The assumption is that pages taken from this pool will be processed, + * and given back, "quickly", and then can be recycled, so we can avoid + * frequent calls to alloc_page(), and still will be able to make progress even + * under memory pressure. + */ +extern struct page *drbd_pp_pool; extern spinlock_t drbd_pp_lock; extern int drbd_pp_vacant; extern wait_queue_head_t drbd_pp_wait; +/* We also need a standard (emergency-reserve backed) page pool + * for meta data IO (activity log, bitmap). + * We can keep it global, as long as it is used as "N pages at a time". + * 128 should be plenty, currently we probably can get away with as few as 1. + */ +#define DRBD_MIN_POOL_PAGES 128 +extern mempool_t *drbd_md_io_page_pool; + extern rwlock_t global_state_lock; extern int conn_lowest_minor(struct drbd_tconn *tconn); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 12c9a704ea0..5f4c95905d5 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -129,6 +129,7 @@ struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */ struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ mempool_t *drbd_request_mempool; mempool_t *drbd_ee_mempool; +mempool_t *drbd_md_io_page_pool; /* I do not use a standard mempool, because: 1) I want to hand out the pre-allocated objects first. @@ -1952,6 +1953,8 @@ static void drbd_destroy_mempools(void) /* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */ + if (drbd_md_io_page_pool) + mempool_destroy(drbd_md_io_page_pool); if (drbd_ee_mempool) mempool_destroy(drbd_ee_mempool); if (drbd_request_mempool) @@ -1965,6 +1968,7 @@ static void drbd_destroy_mempools(void) if (drbd_al_ext_cache) kmem_cache_destroy(drbd_al_ext_cache); + drbd_md_io_page_pool = NULL; drbd_ee_mempool = NULL; drbd_request_mempool = NULL; drbd_ee_cache = NULL; @@ -1988,6 +1992,7 @@ static int drbd_create_mempools(void) drbd_bm_ext_cache = NULL; drbd_al_ext_cache = NULL; drbd_pp_pool = NULL; + drbd_md_io_page_pool = NULL; /* caches */ drbd_request_cache = kmem_cache_create( @@ -2011,6 +2016,10 @@ static int drbd_create_mempools(void) goto Enomem; /* mempools */ + drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0); + if (drbd_md_io_page_pool == NULL) + goto Enomem; + drbd_request_mempool = mempool_create(number, mempool_alloc_slab, mempool_free_slab, drbd_request_cache); if (drbd_request_mempool == NULL) From 9db4e77f8cbbeeb32a4d2aea022c80333c445984 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 23 Feb 2011 15:38:47 +0100 Subject: [PATCH 155/609] drbd: use the newly introduced page pool for bitmap IO Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 3791082979e..0009e40744a 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -963,9 +963,8 @@ static void bm_async_io_complete(struct bio *bio, int error) bm_page_unlock_io(mdev, idx); - /* FIXME give back to page pool */ if (ctx->flags & BM_AIO_COPY_PAGES) - put_page(bio->bi_io_vec[0].bv_page); + mempool_free(bio->bi_io_vec[0].bv_page, drbd_md_io_page_pool); bio_put(bio); @@ -999,10 +998,8 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must bm_set_page_unchanged(b->bm_pages[page_nr]); if (ctx->flags & BM_AIO_COPY_PAGES) { - /* FIXME alloc_page is good enough for now, but actually needs - * to use pre-allocated page pool */ void *src, *dest; - page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT); + page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_WAIT); dest = kmap_atomic(page, KM_USER0); src = kmap_atomic(b->bm_pages[page_nr], KM_USER1); memcpy(dest, src, PAGE_SIZE); @@ -1014,6 +1011,8 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must bio->bi_bdev = mdev->ldev->md_bdev; bio->bi_sector = on_disk_sector; + /* bio_add_page of a single page to an empty bio will always succeed, + * according to api. Do we want to assert that? */ bio_add_page(bio, page, len, 0); bio->bi_private = ctx; bio->bi_end_io = bm_async_io_complete; From da4a75d2ef064501f6756986af6ea330ba0585d7 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 23 Feb 2011 17:02:01 +0100 Subject: [PATCH 156/609] drbd: introduce a bio_set to allocate housekeeping bios from Don't rely on availability of bios from the global fs_bio_set, we should use our own bio_set for meta data IO. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 2 +- drivers/block/drbd/drbd_bitmap.c | 3 +-- drivers/block/drbd/drbd_int.h | 6 ++++++ drivers/block/drbd/drbd_main.c | 28 ++++++++++++++++++++++++++++ drivers/block/drbd/drbd_receiver.c | 6 +++++- 5 files changed, 41 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index ea3895de4e6..7cd78617669 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -125,7 +125,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, rw |= REQ_FUA | REQ_FLUSH; rw |= REQ_SYNC; - bio = bio_alloc(GFP_NOIO, 1); + bio = bio_alloc_drbd(GFP_NOIO); bio->bi_bdev = bdev->md_bdev; bio->bi_sector = sector; ok = (bio_add_page(bio, page, size, 0) == size); diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 0009e40744a..52c48143b22 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -974,8 +974,7 @@ static void bm_async_io_complete(struct bio *bio, int error) static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) { - /* we are process context. we always get a bio */ - struct bio *bio = bio_alloc(GFP_KERNEL, 1); + struct bio *bio = bio_alloc_drbd(GFP_KERNEL); struct drbd_conf *mdev = ctx->mdev; struct drbd_bitmap *b = mdev->bitmap; struct page *page; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 2444a168347..e6875834464 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1489,6 +1489,12 @@ extern wait_queue_head_t drbd_pp_wait; #define DRBD_MIN_POOL_PAGES 128 extern mempool_t *drbd_md_io_page_pool; +/* We also need to make sure we get a bio + * when we need it for housekeeping purposes */ +extern struct bio_set *drbd_md_io_bio_set; +/* to allocate from that set */ +extern struct bio *bio_alloc_drbd(gfp_t gfp_mask); + extern rwlock_t global_state_lock; extern int conn_lowest_minor(struct drbd_tconn *tconn); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 5f4c95905d5..997b2e21467 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -130,6 +130,7 @@ struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ mempool_t *drbd_request_mempool; mempool_t *drbd_ee_mempool; mempool_t *drbd_md_io_page_pool; +struct bio_set *drbd_md_io_bio_set; /* I do not use a standard mempool, because: 1) I want to hand out the pre-allocated objects first. @@ -150,6 +151,25 @@ static const struct block_device_operations drbd_ops = { .release = drbd_release, }; +static void bio_destructor_drbd(struct bio *bio) +{ + bio_free(bio, drbd_md_io_bio_set); +} + +struct bio *bio_alloc_drbd(gfp_t gfp_mask) +{ + struct bio *bio; + + if (!drbd_md_io_bio_set) + return bio_alloc(gfp_mask, 1); + + bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set); + if (!bio) + return NULL; + bio->bi_destructor = bio_destructor_drbd; + return bio; +} + #ifdef __CHECKER__ /* When checking with sparse, and this is an inline function, sparse will give tons of false positives. When this is a real functions sparse works. @@ -1953,6 +1973,8 @@ static void drbd_destroy_mempools(void) /* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */ + if (drbd_md_io_bio_set) + bioset_free(drbd_md_io_bio_set); if (drbd_md_io_page_pool) mempool_destroy(drbd_md_io_page_pool); if (drbd_ee_mempool) @@ -1968,6 +1990,7 @@ static void drbd_destroy_mempools(void) if (drbd_al_ext_cache) kmem_cache_destroy(drbd_al_ext_cache); + drbd_md_io_bio_set = NULL; drbd_md_io_page_pool = NULL; drbd_ee_mempool = NULL; drbd_request_mempool = NULL; @@ -1993,6 +2016,7 @@ static int drbd_create_mempools(void) drbd_al_ext_cache = NULL; drbd_pp_pool = NULL; drbd_md_io_page_pool = NULL; + drbd_md_io_bio_set = NULL; /* caches */ drbd_request_cache = kmem_cache_create( @@ -2016,6 +2040,10 @@ static int drbd_create_mempools(void) goto Enomem; /* mempools */ + drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0); + if (drbd_md_io_bio_set == NULL) + goto Enomem; + drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0); if (drbd_md_io_page_pool == NULL) goto Enomem; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index cd78ebfefe5..6dcf65484c2 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1124,7 +1124,11 @@ int drbd_submit_peer_request(struct drbd_conf *mdev, /* In most cases, we will only need one bio. But in case the lower * level restrictions happen to be different at this offset on this * side than those of the sending peer, we may need to submit the - * request in more than one bio. */ + * request in more than one bio. + * + * Plain bio_alloc is good enough here, this is no DRBD internally + * generated bio, but a bio allocated on behalf of the peer. + */ next_bio: bio = bio_alloc(GFP_NOIO, nr_pages); if (!bio) { From 569083c08dc16c043b4bdd473d41ff85a2b2df9e Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 7 Mar 2011 09:49:02 +0100 Subject: [PATCH 157/609] drbd: fix drbd_delete_device: remove vnr from volumes; idr_remove(); synchronize_rcu(); before cleanup Still missing: rcu_readlock() on the various call sites that access/iterate over those idrs. We don't need a specific write lock, as we only modify from configuration context, which is already strictly serialized. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 42 +++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 997b2e21467..9f6db5947c6 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2124,7 +2124,9 @@ void drbd_delete_device(unsigned int minor) if (!mdev) return; - idr_remove(&mdev->tconn->volumes, minor); + idr_remove(&mdev->tconn->volumes, mdev->vnr); + idr_remove(&minors, minor); + synchronize_rcu(); /* paranoia asserts */ D_ASSERT(mdev->open_cnt == 0); @@ -2153,7 +2155,6 @@ void drbd_delete_device(unsigned int minor) * allocated from drbd_new_device * and actually free the mdev itself */ drbd_free_mdev(mdev); - idr_remove(&minors, minor); } static void drbd_cleanup(void) @@ -2331,15 +2332,6 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, return ERR_NOMEM; mdev->tconn = tconn; - if (!idr_pre_get(&tconn->volumes, GFP_KERNEL)) - goto out_no_idr; - if (idr_get_new(&tconn->volumes, mdev, &vnr_got)) - goto out_no_idr; - if (vnr_got != vnr) { - dev_err(DEV, "vnr_got (%d) != vnr (%d)\n", vnr_got, vnr); - goto out_no_q; - } - mdev->minor = minor; drbd_init_set_defaults(mdev); @@ -2395,19 +2387,35 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, INIT_LIST_HEAD(&mdev->current_epoch->list); mdev->epochs = 1; + if (!idr_pre_get(&tconn->volumes, GFP_KERNEL)) + goto out_no_vol_idr; + if (idr_get_new(&tconn->volumes, mdev, &vnr_got)) + goto out_no_vol_idr; + if (vnr_got != vnr) { + dev_err(DEV, "vnr_got (%d) != vnr (%d)\n", vnr_got, vnr); + goto out_idr_remove_vol; + } + if (!idr_pre_get(&minors, GFP_KERNEL)) - goto out_no_minor_idr; + goto out_idr_remove_vol; if (idr_get_new(&minors, mdev, &minor_got)) - goto out_no_minor_idr; + goto out_idr_remove_vol; if (minor_got != minor) { - idr_remove(&minors, minor_got); - goto out_no_minor_idr; + /* minor exists, or other idr strangeness? */ + dev_err(DEV, "available minor (%d) != requested minor (%d)\n", + minor_got, minor); + goto out_idr_remove_minor; } add_disk(disk); return NO_ERROR; -out_no_minor_idr: +out_idr_remove_minor: + idr_remove(&minors, minor_got); +out_idr_remove_vol: + idr_remove(&tconn->volumes, vnr_got); + synchronize_rcu(); +out_no_vol_idr: kfree(mdev->current_epoch); out_no_epoch: drbd_bm_cleanup(mdev); @@ -2418,8 +2426,6 @@ out_no_io_page: out_no_disk: blk_cleanup_queue(q); out_no_q: - idr_remove(&tconn->volumes, vnr_got); -out_no_idr: kfree(mdev); return ERR_NOMEM; } From 3cb7a2a90fe35eb3059e8860d0c6917eb414f791 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 7 Mar 2011 10:00:58 +0100 Subject: [PATCH 158/609] drbd: get rid of drbd_bcast_ee, it is of no use anymore This function was used to broadcast the (leading part of the) bio payload in case we see a data integrity error. It could be received from userland with the drbdsetup events subcommand, to have a peek into the payload that caused the checksum mismatch, and guess from there what may have caused the mismatch, mainly to guess wether it was modification of in-flight data, or data corruption by broken hardware or software bugs. Meanwhile we support bios that are larger than the maximum payload a netlink datagram can carry. And we have means to reliably detect modification of in-flight data by calculating, and comparing, the checksum before and after sendmsg. There is no need to carry this around anymore. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6dcf65484c2..1aace37c516 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1353,8 +1353,6 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, if (memcmp(dig_in, dig_vv, dgs)) { dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n", (unsigned long long)sector, data_size); - drbd_bcast_ee(mdev, "digest failed", - dgs, dig_in, dig_vv, peer_req); drbd_free_ee(mdev, peer_req); return NULL; } From ec2c35ac1ea288f5c931e32452ecea50068e8450 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 7 Mar 2011 10:20:08 +0100 Subject: [PATCH 159/609] drbd: prepare the transition from connector to genetlink This adds the new API header and helper files. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_genl.h | 349 +++++++++++++++++++++++++ include/linux/drbd_genl_api.h | 55 ++++ include/linux/genl_magic_func.h | 417 ++++++++++++++++++++++++++++++ include/linux/genl_magic_struct.h | 260 +++++++++++++++++++ 4 files changed, 1081 insertions(+) create mode 100644 include/linux/drbd_genl.h create mode 100644 include/linux/drbd_genl_api.h create mode 100644 include/linux/genl_magic_func.h create mode 100644 include/linux/genl_magic_struct.h diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h new file mode 100644 index 00000000000..84e16848f7a --- /dev/null +++ b/include/linux/drbd_genl.h @@ -0,0 +1,349 @@ +/* + * General overview: + * full generic netlink message: + * |nlmsghdr|genlmsghdr| + * + * payload: + * |optional fixed size family header| + * + * sequence of netlink attributes: + * I chose to have all "top level" attributes NLA_NESTED, + * corresponding to some real struct. + * So we have a sequence of |tla, len| + * + * nested nla sequence: + * may be empty, or contain a sequence of netlink attributes + * representing the struct fields. + * + * The tag number of any field (regardless of containing struct) + * will be available as T_ ## field_name, + * so you cannot have the same field name in two differnt structs. + * + * The tag numbers themselves are per struct, though, + * so should always begin at 1 (not 0, that is the special "NLA_UNSPEC" type, + * which we won't use here). + * The tag numbers are used as index in the respective nla_policy array. + * + * GENL_struct(tag_name, tag_number, struct name, struct fields) - struct and policy + * genl_magic_struct.h + * generates the struct declaration, + * generates an entry in the tla enum, + * genl_magic_func.h + * generates an entry in the static tla policy + * with .type = NLA_NESTED + * generates the static _nl_policy definition, + * and static conversion functions + * + * genl_magic_func.h + * + * GENL_mc_group(group) + * genl_magic_struct.h + * does nothing + * genl_magic_func.h + * defines and registers the mcast group, + * and provides a send helper + * + * GENL_notification(op_name, op_num, mcast_group, tla list) + * These are notifications to userspace. + * + * genl_magic_struct.h + * generates an entry in the genl_ops enum, + * genl_magic_func.h + * does nothing + * + * mcast group: the name of the mcast group this notification should be + * expected on + * tla list: the list of expected top level attributes, + * for documentation and sanity checking. + * + * GENL_op(op_name, op_num, flags and handler, tla list) - "genl operations" + * These are requests from userspace. + * + * _op and _notification share the same "number space", + * op_nr will be assigned to "genlmsghdr->cmd" + * + * genl_magic_struct.h + * generates an entry in the genl_ops enum, + * genl_magic_func.h + * generates an entry in the static genl_ops array, + * and static register/unregister functions to + * genl_register_family_with_ops(). + * + * flags and handler: + * GENL_op_init( .doit = x, .dumpit = y, .flags = something) + * GENL_doit(x) => .dumpit = NULL, .flags = GENL_ADMIN_PERM + * tla list: the list of expected top level attributes, + * for documentation and sanity checking. + */ + +/* + * STRUCTS + */ + +/* this is sent kernel -> userland on various error conditions, and contains + * informational textual info, which is supposedly human readable. + * The computer relevant return code is in the drbd_genlmsghdr. + */ +GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, + /* "arbitrary" size strings, nla_policy.len = 0 */ + __str_field(1, GENLA_F_MANDATORY, info_text, 0) +) + +/* Configuration requests typically need a context to operate on. + * Possible keys are device minor (fits in the drbd_genlmsghdr), + * the replication link (aka connection) name, + * and/or the replication group (aka resource) name, + * and the volume id within the resource. */ +GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, + /* currently only 256 volumes per group, + * but maybe we still change that */ + __u32_field(1, GENLA_F_MANDATORY, ctx_volume) + __str_field(2, GENLA_F_MANDATORY, ctx_conn_name, 128) +) + +GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, + __u64_field(1, GENLA_F_MANDATORY, disk_size) + __str_field(2, GENLA_F_REQUIRED, backing_dev, 128) + __str_field(3, GENLA_F_REQUIRED, meta_dev, 128) + __u32_field(4, GENLA_F_REQUIRED, meta_dev_idx) + __u32_field(5, GENLA_F_MANDATORY, max_bio_bvecs) + __u32_field(6, GENLA_F_MANDATORY, on_io_error) + __u32_field(7, GENLA_F_MANDATORY, fencing) + __flg_field(8, GENLA_F_MANDATORY, no_disk_barrier) + __flg_field(9, GENLA_F_MANDATORY, no_disk_flush) + __flg_field(10, GENLA_F_MANDATORY, no_disk_drain) + __flg_field(11, GENLA_F_MANDATORY, no_md_flush) + __flg_field(12, GENLA_F_MANDATORY, use_bmbv) +) + +GENL_struct(DRBD_NLA_SYNCER_CONF, 4, syncer_conf, + __u32_field(1, GENLA_F_MANDATORY, rate) + __u32_field(2, GENLA_F_MANDATORY, after) + __u32_field(3, GENLA_F_MANDATORY, al_extents) + __str_field(4, GENLA_F_MANDATORY, cpu_mask, 32) + __str_field(5, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) + __str_field(6, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) + __flg_field(7, GENLA_F_MANDATORY, use_rle) + __u32_field(8, GENLA_F_MANDATORY, on_no_data) + __u32_field(9, GENLA_F_MANDATORY, c_plan_ahead) + __u32_field(10, GENLA_F_MANDATORY, c_delay_target) + __u32_field(11, GENLA_F_MANDATORY, c_fill_target) + __u32_field(12, GENLA_F_MANDATORY, c_max_rate) + __u32_field(13, GENLA_F_MANDATORY, c_min_rate) +) + +GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, + __str_field(1, GENLA_F_MANDATORY | GENLA_F_SENSITIVE, + shared_secret, SHARED_SECRET_MAX) + __str_field(2, GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) + __str_field(3, GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) + __str_field(4, GENLA_F_REQUIRED, my_addr, 128) + __str_field(5, GENLA_F_REQUIRED, peer_addr, 128) + __u32_field(6, GENLA_F_REQUIRED, wire_protocol) + __u32_field(7, GENLA_F_MANDATORY, try_connect_int) + __u32_field(8, GENLA_F_MANDATORY, timeout) + __u32_field(9, GENLA_F_MANDATORY, ping_int) + __u32_field(10, GENLA_F_MANDATORY, ping_timeo) + __u32_field(11, GENLA_F_MANDATORY, sndbuf_size) + __u32_field(12, GENLA_F_MANDATORY, rcvbuf_size) + __u32_field(13, GENLA_F_MANDATORY, ko_count) + __u32_field(14, GENLA_F_MANDATORY, max_buffers) + __u32_field(15, GENLA_F_MANDATORY, max_epoch_size) + __u32_field(16, GENLA_F_MANDATORY, unplug_watermark) + __u32_field(17, GENLA_F_MANDATORY, after_sb_0p) + __u32_field(18, GENLA_F_MANDATORY, after_sb_1p) + __u32_field(19, GENLA_F_MANDATORY, after_sb_2p) + __u32_field(20, GENLA_F_MANDATORY, rr_conflict) + __u32_field(21, GENLA_F_MANDATORY, on_congestion) + __u32_field(22, GENLA_F_MANDATORY, cong_fill) + __u32_field(23, GENLA_F_MANDATORY, cong_extents) + __flg_field(24, GENLA_F_MANDATORY, two_primaries) + __flg_field(25, GENLA_F_MANDATORY, want_lose) + __flg_field(26, GENLA_F_MANDATORY, no_cork) + __flg_field(27, GENLA_F_MANDATORY, always_asbp) + __flg_field(28, GENLA_F_MANDATORY, dry_run) +) + +GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, + __flg_field(1, GENLA_F_MANDATORY, assume_uptodate) +) + +GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms, + __u64_field(1, GENLA_F_MANDATORY, resize_size) + __flg_field(2, GENLA_F_MANDATORY, resize_force) + __flg_field(3, GENLA_F_MANDATORY, no_resync) +) + +GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info, + /* the reason of the broadcast, + * if this is an event triggered broadcast. */ + __u32_field(1, GENLA_F_MANDATORY, sib_reason) + __u32_field(2, GENLA_F_REQUIRED, current_state) + __u64_field(3, GENLA_F_MANDATORY, capacity) + __u64_field(4, GENLA_F_MANDATORY, ed_uuid) + + /* These are for broadcast from after state change work. + * prev_state and new_state are from the moment the state change took + * place, new_state is not neccessarily the same as current_state, + * there may have been more state changes since. Which will be + * broadcasted soon, in their respective after state change work. */ + __u32_field(5, GENLA_F_MANDATORY, prev_state) + __u32_field(6, GENLA_F_MANDATORY, new_state) + + /* if we have a local disk: */ + __bin_field(7, GENLA_F_MANDATORY, uuids, (UI_SIZE*sizeof(__u64))) + __u32_field(8, GENLA_F_MANDATORY, disk_flags) + __u64_field(9, GENLA_F_MANDATORY, bits_total) + __u64_field(10, GENLA_F_MANDATORY, bits_oos) + /* and in case resync or online verify is active */ + __u64_field(11, GENLA_F_MANDATORY, bits_rs_total) + __u64_field(12, GENLA_F_MANDATORY, bits_rs_failed) + + /* for pre and post notifications of helper execution */ + __str_field(13, GENLA_F_MANDATORY, helper, 32) + __u32_field(14, GENLA_F_MANDATORY, helper_exit_code) +) + +GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms, + __u64_field(1, GENLA_F_MANDATORY, ov_start_sector) +) + +GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms, + __flg_field(1, GENLA_F_MANDATORY, clear_bm) +) + +GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms, + __u32_field(1, GENLA_F_REQUIRED, timeout_type) +) + +GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms, + __flg_field(1, GENLA_F_MANDATORY, force_disconnect) +) + +/* + * Notifications and commands (genlmsghdr->cmd) + */ +GENL_mc_group(events) + + /* kernel -> userspace announcement of changes */ +GENL_notification( + DRBD_EVENT, 1, events, + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_STATE_INFO, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY) +) + + /* query kernel for specific or all info */ +GENL_op( + DRBD_ADM_GET_STATUS, 2, + GENL_op_init( + .doit = drbd_adm_get_status, + .dumpit = drbd_adm_get_status_all, + /* anyone may ask for the status, + * it is broadcasted anyways */ + ), + /* To select the object .doit. + * Or a subset of objects in .dumpit. */ + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_MANDATORY) +) + +#if 0 + /* TO BE DONE */ + /* create or destroy resources, aka replication groups */ +GENL_op(DRBD_ADM_CREATE_RESOURCE, 3, GENL_doit(drbd_adm_create_resource), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_DELETE_RESOURCE, 4, GENL_doit(drbd_adm_delete_resource), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +#endif + + /* add DRBD minor devices as volumes to resources */ +GENL_op(DRBD_ADM_ADD_MINOR, 5, GENL_doit(drbd_adm_add_minor), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + + /* add or delete replication links to resources */ +GENL_op(DRBD_ADM_ADD_LINK, 7, GENL_doit(drbd_adm_create_connection), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_DEL_LINK, 8, GENL_doit(drbd_adm_delete_connection), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + + /* operates on replication links */ +GENL_op(DRBD_ADM_SYNCER, 9, + GENL_doit(drbd_adm_syncer), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY) +) + +GENL_op( + DRBD_ADM_CONNECT, 10, + GENL_doit(drbd_adm_connect), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED) +) + +GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + + /* operates on minors */ +GENL_op(DRBD_ADM_ATTACH, 12, + GENL_doit(drbd_adm_attach), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_REQUIRED) +) + +GENL_op( + DRBD_ADM_RESIZE, 13, + GENL_doit(drbd_adm_resize), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, GENLA_F_MANDATORY) +) + + /* operates on all volumes within a resource */ +GENL_op( + DRBD_ADM_PRIMARY, 14, + GENL_doit(drbd_adm_set_role), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, GENLA_F_REQUIRED) +) + +GENL_op( + DRBD_ADM_SECONDARY, 15, + GENL_doit(drbd_adm_set_role), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, GENLA_F_REQUIRED) +) + +GENL_op( + DRBD_ADM_NEW_C_UUID, 16, + GENL_doit(drbd_adm_new_c_uuid), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, GENLA_F_MANDATORY) +) + +GENL_op( + DRBD_ADM_START_OV, 17, + GENL_doit(drbd_adm_start_ov), + GENL_tla_expected(DRBD_NLA_START_OV_PARMS, GENLA_F_MANDATORY) +) + +GENL_op(DRBD_ADM_DETACH, 18, GENL_doit(drbd_adm_detach), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_INVALIDATE, 19, GENL_doit(drbd_adm_invalidate), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_INVAL_PEER, 20, GENL_doit(drbd_adm_invalidate_peer), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_PAUSE_SYNC, 21, GENL_doit(drbd_adm_pause_sync), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_RESUME_SYNC, 22, GENL_doit(drbd_adm_resume_sync), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_SUSPEND_IO, 23, GENL_doit(drbd_adm_suspend_io), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_RESUME_IO, 24, GENL_doit(drbd_adm_resume_io), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_OUTDATE, 25, GENL_doit(drbd_adm_outdate), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) diff --git a/include/linux/drbd_genl_api.h b/include/linux/drbd_genl_api.h new file mode 100644 index 00000000000..9ef50d51e34 --- /dev/null +++ b/include/linux/drbd_genl_api.h @@ -0,0 +1,55 @@ +#ifndef DRBD_GENL_STRUCT_H +#define DRBD_GENL_STRUCT_H + +/** + * struct drbd_genlmsghdr - DRBD specific header used in NETLINK_GENERIC requests + * @minor: + * For admin requests (user -> kernel): which minor device to operate on. + * For (unicast) replies or informational (broadcast) messages + * (kernel -> user): which minor device the information is about. + * If we do not operate on minors, but on connections or resources, + * the minor value shall be (~0), and the attribute DRBD_NLA_CFG_CONTEXT + * is used instead. + * @flags: possible operation modifiers (relevant only for user->kernel): + * DRBD_GENL_F_SET_DEFAULTS + * @volume: + * When creating a new minor (adding it to a resource), the resource needs + * to know which volume number within the resource this is supposed to be. + * The volume number corresponds to the same volume number on the remote side, + * whereas the minor number on the remote side may be different + * (union with flags). + * @ret_code: kernel->userland unicast cfg reply return code (union with flags); + */ +struct drbd_genlmsghdr { + __u32 minor; + union { + __u32 flags; + __s32 ret_code; + }; +}; + +/* To be used in drbd_genlmsghdr.flags */ +enum { + DRBD_GENL_F_SET_DEFAULTS = 1, +}; + +enum drbd_state_info_bcast_reason { + SIB_GET_STATUS_REPLY = 1, + SIB_STATE_CHANGE = 2, + SIB_HELPER_PRE = 3, + SIB_HELPER_POST = 4, + SIB_SYNC_PROGRESS = 5, +}; + +/* hack around predefined gcc/cpp "linux=1", + * we cannot possibly include <1/drbd_genl.h> */ +#undef linux + +#include +#define GENL_MAGIC_VERSION API_VERSION +#define GENL_MAGIC_FAMILY drbd +#define GENL_MAGIC_FAMILY_HDRSZ sizeof(struct drbd_genlmsghdr) +#define GENL_MAGIC_INCLUDE_FILE +#include + +#endif diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h new file mode 100644 index 00000000000..8a86f659d36 --- /dev/null +++ b/include/linux/genl_magic_func.h @@ -0,0 +1,417 @@ +#ifndef GENL_MAGIC_FUNC_H +#define GENL_MAGIC_FUNC_H + +#include + +/* + * Extension of genl attribute validation policies {{{1 + * {{{2 + */ + +/** + * nla_is_required - return true if this attribute is required + * @nla: netlink attribute + */ +static inline int nla_is_required(const struct nlattr *nla) +{ + return nla->nla_type & GENLA_F_REQUIRED; +} + +/** + * nla_is_mandatory - return true if understanding this attribute is mandatory + * @nla: netlink attribute + * Note: REQUIRED attributes are implicitly MANDATORY as well + */ +static inline int nla_is_mandatory(const struct nlattr *nla) +{ + return nla->nla_type & (GENLA_F_MANDATORY | GENLA_F_REQUIRED); +} + +/* Functionality to be integrated into nla_parse(), and validate_nla(), + * respectively. + * + * Enforcing the "mandatory" bit is done here, + * by rejecting unknown mandatory attributes. + * + * Part of enforcing the "required" flag would mean to embed it into + * nla_policy.type, and extending validate_nla(), which currently does + * BUG_ON(pt->type > NLA_TYPE_MAX); we have to work on existing kernels, + * so we cannot do that. Thats why enforcing "required" is done in the + * generated assignment functions below. */ +static int nla_check_unknown(int maxtype, struct nlattr *head, int len) +{ + struct nlattr *nla; + int rem; + nla_for_each_attr(nla, head, len, rem) { + __u16 type = nla_type(nla); + if (type > maxtype && nla_is_mandatory(nla)) + return -EOPNOTSUPP; + } + return 0; +} + +/* + * Magic: declare tla policy {{{1 + * Magic: declare nested policies + * {{{2 + */ +#undef GENL_mc_group +#define GENL_mc_group(group) + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ + [tag_name] = { .type = NLA_NESTED }, + +static struct nla_policy CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy)[] = { +#include GENL_MAGIC_INCLUDE_FILE +}; + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +static struct nla_policy s_name ## _nl_policy[] __read_mostly = \ +{ s_fields }; + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, _type, __get, __put) \ + [__nla_type(attr_nr)] = { .type = nla_type }, + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen, \ + __get, __put) \ + [__nla_type(attr_nr)] = { .type = nla_type, \ + .len = maxlen - (nla_type == NLA_NUL_STRING) }, + +#include GENL_MAGIC_INCLUDE_FILE + +#ifndef __KERNEL__ +#ifndef pr_info +#define pr_info(args...) fprintf(stderr, args); +#endif +#endif + +#if 1 +static void dprint_field(const char *dir, int nla_type, + const char *name, void *valp) +{ + __u64 val = valp ? *(__u32 *)valp : 1; + switch (nla_type) { + case NLA_U8: val = (__u8)val; + case NLA_U16: val = (__u16)val; + case NLA_U32: val = (__u32)val; + pr_info("%s attr %s: %d 0x%08x\n", dir, + name, (int)val, (unsigned)val); + break; + case NLA_U64: + val = *(__u64*)valp; + pr_info("%s attr %s: %lld 0x%08llx\n", dir, + name, (long long)val, (unsigned long long)val); + break; + case NLA_FLAG: + if (val) + pr_info("%s attr %s: set\n", dir, name); + break; + } +} + +static void dprint_array(const char *dir, int nla_type, + const char *name, const char *val, unsigned len) +{ + switch (nla_type) { + case NLA_NUL_STRING: + if (len && val[len-1] == '\0') + len--; + pr_info("%s attr %s: [len:%u] '%s'\n", dir, name, len, val); + break; + default: + /* we can always show 4 byte, + * thats what nlattr are aligned to. */ + pr_info("%s attr %s: [len:%u] %02x%02x%02x%02x ...\n", + dir, name, len, val[0], val[1], val[2], val[3]); + } +} + +#define DPRINT_TLA(a, op, b) pr_info("%s %s %s\n", a, op, b); + +/* Name is a member field name of the struct s. + * If s is NULL (only parsing, no copy requested in *_from_attrs()), + * nla is supposed to point to the attribute containing the information + * corresponding to that struct member. */ +#define DPRINT_FIELD(dir, nla_type, name, s, nla) \ + do { \ + if (s) \ + dprint_field(dir, nla_type, #name, &s->name); \ + else if (nla) \ + dprint_field(dir, nla_type, #name, \ + (nla_type == NLA_FLAG) ? NULL \ + : nla_data(nla)); \ + } while (0) + +#define DPRINT_ARRAY(dir, nla_type, name, s, nla) \ + do { \ + if (s) \ + dprint_array(dir, nla_type, #name, \ + s->name, s->name ## _len); \ + else if (nla) \ + dprint_array(dir, nla_type, #name, \ + nla_data(nla), nla_len(nla)); \ + } while (0) +#else +#define DPRINT_TLA(a, op, b) do {} while (0) +#define DPRINT_FIELD(dir, nla_type, name, s, nla) do {} while (0) +#define DPRINT_ARRAY(dir, nla_type, name, s, nla) do {} while (0) +#endif + +/* + * Magic: provide conversion functions {{{1 + * populate struct from attribute table: + * {{{2 + */ + +/* processing of generic netlink messages is serialized. + * use one static buffer for parsing of nested attributes */ +static struct nlattr *nested_attr_tb[128]; + +#ifndef BUILD_BUG_ON +/* Force a compilation error if condition is true */ +#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition)) +/* Force a compilation error if condition is true, but also produce a + result (of value 0 and type size_t), so the expression can be used + e.g. in a structure initializer (or where-ever else comma expressions + aren't permitted). */ +#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) +#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) +#endif + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ + /* static, potentially unused */ \ +int s_name ## _from_attrs(struct s_name *s, struct nlattr *tb[]) \ +{ \ + const int maxtype = ARRAY_SIZE(s_name ## _nl_policy)-1; \ + struct nlattr *tla = tb[tag_number]; \ + struct nlattr **ntb = nested_attr_tb; \ + struct nlattr *nla; \ + int err; \ + BUILD_BUG_ON(ARRAY_SIZE(s_name ## _nl_policy) > ARRAY_SIZE(nested_attr_tb)); \ + if (!tla) \ + return -ENOMSG; \ + DPRINT_TLA(#s_name, "<=-", #tag_name); \ + err = nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy); \ + if (err) \ + return err; \ + err = nla_check_unknown(maxtype, nla_data(tla), nla_len(tla)); \ + if (err) \ + return err; \ + \ + s_fields \ + return 0; \ +} + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + nla = ntb[__nla_type(attr_nr)]; \ + if (nla) { \ + if (s) \ + s->name = __get(nla); \ + DPRINT_FIELD("<<", nla_type, name, s, nla); \ + } else if ((attr_flag) & GENLA_F_REQUIRED) { \ + pr_info("<< missing attr: %s\n", #name); \ + return -ENOMSG; \ + } + +/* validate_nla() already checked nla_len <= maxlen appropriately. */ +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + nla = ntb[__nla_type(attr_nr)]; \ + if (nla) { \ + if (s) \ + s->name ## _len = \ + __get(s->name, nla, maxlen); \ + DPRINT_ARRAY("<<", nla_type, name, s, nla); \ + } else if ((attr_flag) & GENLA_F_REQUIRED) { \ + pr_info("<< missing attr: %s\n", #name); \ + return -ENOMSG; \ + } \ + +#include GENL_MAGIC_INCLUDE_FILE + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) + +/* + * Magic: define op number to op name mapping {{{1 + * {{{2 + */ +const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd) +{ + switch (cmd) { +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) \ + case op_num: return #op_name; +#include GENL_MAGIC_INCLUDE_FILE + default: + return "unknown"; + } +} + +#ifdef __KERNEL__ +#include +/* + * Magic: define genl_ops {{{1 + * {{{2 + */ + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) \ +{ \ + handler \ + .cmd = op_name, \ + .policy = CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy), \ +}, + +#define ZZZ_genl_ops CONCAT_(GENL_MAGIC_FAMILY, _genl_ops) +static struct genl_ops ZZZ_genl_ops[] __read_mostly = { +#include GENL_MAGIC_INCLUDE_FILE +}; + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) + +/* + * Define the genl_family, multicast groups, {{{1 + * and provide register/unregister functions. + * {{{2 + */ +#define ZZZ_genl_family CONCAT_(GENL_MAGIC_FAMILY, _genl_family) +static struct genl_family ZZZ_genl_family __read_mostly = { + .id = GENL_ID_GENERATE, + .name = __stringify(GENL_MAGIC_FAMILY), + .version = GENL_MAGIC_VERSION, +#ifdef GENL_MAGIC_FAMILY_HDRSZ + .hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ), +#endif + .maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1, +}; + +/* + * Magic: define multicast groups + * Magic: define multicast group registration helper + */ +#undef GENL_mc_group +#define GENL_mc_group(group) \ +static struct genl_multicast_group \ +CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group) __read_mostly = { \ + .name = #group, \ +}; \ +static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)( \ + struct sk_buff *skb, gfp_t flags) \ +{ \ + unsigned int group_id = \ + CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id; \ + if (!group_id) \ + return -EINVAL; \ + return genlmsg_multicast(skb, 0, group_id, flags); \ +} + +#include GENL_MAGIC_INCLUDE_FILE + +int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void) +{ + int err = genl_register_family_with_ops(&ZZZ_genl_family, + ZZZ_genl_ops, ARRAY_SIZE(ZZZ_genl_ops)); + if (err) + return err; +#undef GENL_mc_group +#define GENL_mc_group(group) \ + err = genl_register_mc_group(&ZZZ_genl_family, \ + &CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group)); \ + if (err) \ + goto fail; \ + else \ + pr_info("%s: mcg %s: %u\n", #group, \ + __stringify(GENL_MAGIC_FAMILY), \ + CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id); + +#include GENL_MAGIC_INCLUDE_FILE + +#undef GENL_mc_group +#define GENL_mc_group(group) + return 0; +fail: + genl_unregister_family(&ZZZ_genl_family); + return err; +} + +void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void) +{ + genl_unregister_family(&ZZZ_genl_family); +} + +/* + * Magic: provide conversion functions {{{1 + * populate skb from struct. + * {{{2 + */ + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +static int s_name ## _to_skb(struct sk_buff *skb, struct s_name *s, \ + const bool exclude_sensitive) \ +{ \ + struct nlattr *tla = nla_nest_start(skb, tag_number); \ + if (!tla) \ + goto nla_put_failure; \ + DPRINT_TLA(#s_name, "-=>", #tag_name); \ + s_fields \ + nla_nest_end(skb, tla); \ + return 0; \ + \ +nla_put_failure: \ + if (tla) \ + nla_nest_cancel(skb, tla); \ + return -EMSGSIZE; \ +} \ +static inline int s_name ## _to_priv_skb(struct sk_buff *skb, \ + struct s_name *s) \ +{ \ + return s_name ## _to_skb(skb, s, 0); \ +} \ +static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ + struct s_name *s) \ +{ \ + return s_name ## _to_skb(skb, s, 1); \ +} + + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) { \ + DPRINT_FIELD(">>", nla_type, name, s, NULL); \ + __put(skb, attr_nr, s->name); \ + } + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) { \ + DPRINT_ARRAY(">>",nla_type, name, s, NULL); \ + __put(skb, attr_nr, min_t(int, maxlen, \ + s->name ## _len + (nla_type == NLA_NUL_STRING)),\ + s->name); \ + } + +#include GENL_MAGIC_INCLUDE_FILE + +#endif /* __KERNEL__ */ + +/* }}}1 */ +#endif /* GENL_MAGIC_FUNC_H */ +/* vim: set foldmethod=marker foldlevel=1 nofoldenable : */ diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h new file mode 100644 index 00000000000..745ebfd6c7e --- /dev/null +++ b/include/linux/genl_magic_struct.h @@ -0,0 +1,260 @@ +#ifndef GENL_MAGIC_STRUCT_H +#define GENL_MAGIC_STRUCT_H + +#ifndef GENL_MAGIC_FAMILY +# error "you need to define GENL_MAGIC_FAMILY before inclusion" +#endif + +#ifndef GENL_MAGIC_VERSION +# error "you need to define GENL_MAGIC_VERSION before inclusion" +#endif + +#ifndef GENL_MAGIC_INCLUDE_FILE +# error "you need to define GENL_MAGIC_INCLUDE_FILE before inclusion" +#endif + +#include +#include + +#define CONCAT__(a,b) a ## b +#define CONCAT_(a,b) CONCAT__(a,b) + +extern int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void); +extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void); + +/* + * Extension of genl attribute validation policies {{{2 + */ + +/** + * GENLA_F_FLAGS - policy type flags to ease compatible ABI evolvement + * + * @GENLA_F_REQUIRED: attribute has to be present, or message is considered invalid. + * Adding new REQUIRED attributes breaks ABI compatibility, so don't do that. + * + * @GENLA_F_MANDATORY: if present, receiver _must_ understand it. + * Without this, unknown attributes (> maxtype) are _silently_ ignored + * by validate_nla(). + * + * To be used for API extensions, so older kernel can reject requests for not + * yet implemented features, if newer userland tries to use them even though + * the genl_family version clearly indicates they are not available. + * + * @GENLA_F_MAY_IGNORE: To clearly document the fact, for good measure. + * To be used for API extensions for things that have sane defaults, + * so newer userland can still talk to older kernel, knowing it will + * silently ignore these attributes if not yet known. + * + * NOTE: These flags overload + * NLA_F_NESTED (1 << 15) + * NLA_F_NET_BYTEORDER (1 << 14) + * from linux/netlink.h, which are not useful for validate_nla(): + * NET_BYTEORDER is not used anywhere, and NESTED would be specified by setting + * .type = NLA_NESTED in the appropriate policy. + * + * See also: nla_type() + */ +enum { + GENLA_F_MAY_IGNORE = 0, + GENLA_F_MANDATORY = 1 << 14, + GENLA_F_REQUIRED = 1 << 15, + + /* This will not be present in the __u16 .nla_type, but can be + * triggered on in _to_skb, to exclude "sensitive" + * information from broadcasts, or on unpriviledged get requests. + * This is useful because genetlink multicast groups can be listened in + * on by anyone. */ + GENLA_F_SENSITIVE = 1 << 16, +}; + +#define __nla_type(x) ((__u16)((__u16)(x) & (__u16)NLA_TYPE_MASK)) + +/* }}}1 + * MAGIC + * multi-include macro expansion magic starts here + */ + +/* MAGIC helpers {{{2 */ + +/* possible field types */ +#define __flg_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_FLAG, char, \ + nla_get_flag, __nla_put_flag) +#define __u8_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \ + nla_get_u8, NLA_PUT_U8) +#define __u16_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U16, __u16, \ + nla_get_u16, NLA_PUT_U16) +#define __u32_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U32, __u32, \ + nla_get_u32, NLA_PUT_U32) +#define __u64_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U64, __u64, \ + nla_get_u64, NLA_PUT_U64) +#define __str_field(attr_nr, attr_flag, name, maxlen) \ + __array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \ + nla_strlcpy, NLA_PUT) +#define __bin_field(attr_nr, attr_flag, name, maxlen) \ + __array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \ + nla_memcpy, NLA_PUT) + +#define __nla_put_flag(skb, attrtype, value) \ + do { \ + if (value) \ + NLA_PUT_FLAG(skb, attrtype); \ + } while (0) + +#define GENL_op_init(args...) args +#define GENL_doit(handler) \ + .doit = handler, \ + .flags = GENL_ADMIN_PERM, +#define GENL_dumpit(handler) \ + .dumpit = handler, \ + .flags = GENL_ADMIN_PERM, + +/* }}}1 + * Magic: define the enum symbols for genl_ops + * Magic: define the enum symbols for top level attributes + * Magic: define the enum symbols for nested attributes + * {{{2 + */ + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) + +#undef GENL_mc_group +#define GENL_mc_group(group) + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) \ + op_name = op_num, + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) \ + op_name = op_num, + +enum { +#include GENL_MAGIC_INCLUDE_FILE +}; + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, attr_list) + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ + tag_name = tag_number, + +enum { +#include GENL_MAGIC_INCLUDE_FILE +}; + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +enum { \ + s_fields \ +}; + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + T_ ## name = (__u16)(attr_nr | attr_flag), + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + T_ ## name = (__u16)(attr_nr | attr_flag), + +#include GENL_MAGIC_INCLUDE_FILE + +/* }}}1 + * Magic: compile time assert unique numbers for operations + * Magic: -"- unique numbers for top level attributes + * Magic: -"- unique numbers for nested attributes + * {{{2 + */ + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, attr_list) \ + case op_name: + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) \ + case op_name: + +static inline void ct_assert_unique_operations(void) +{ + switch (0) { +#include GENL_MAGIC_INCLUDE_FILE + ; + } +} + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, attr_list) + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ + case tag_number: + +static inline void ct_assert_unique_top_level_attributes(void) +{ + switch (0) { +#include GENL_MAGIC_INCLUDE_FILE + ; + } +} + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +static inline void ct_assert_unique_ ## s_name ## _attributes(void) \ +{ \ + switch (0) { \ + s_fields \ + ; \ + } \ +} + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + case attr_nr: + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + case attr_nr: + +#include GENL_MAGIC_INCLUDE_FILE + +/* }}}1 + * Magic: declare structs + * struct { + * fields + * }; + * {{{2 + */ + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +struct s_name { s_fields }; + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + type name; + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + type name[maxlen]; \ + __u32 name ## _len; + +#include GENL_MAGIC_INCLUDE_FILE + +/* }}}1 */ +#endif /* GENL_MAGIC_STRUCT_H */ +/* vim: set foldmethod=marker nofoldenable : */ From 9f2247bb9b75b2be65a12167c89271121b2c90c5 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 16 Aug 2012 14:25:58 +0200 Subject: [PATCH 160/609] drbd: Protect accesses to the uuid set with a spinlock There is at least the worker context, the receiver context, the context of receiving netlink packts and processes reading a sysfs attribute that access the uuids. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 4 +++- drivers/block/drbd/drbd_main.c | 33 ++++++++++++++++++++++++------ drivers/block/drbd/drbd_nl.c | 5 +++++ drivers/block/drbd/drbd_receiver.c | 11 +++++++--- 4 files changed, 43 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index b953cc7c9c0..9a6d3a4a739 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -912,6 +912,7 @@ struct drbd_md { u64 md_offset; /* sector offset to 'super' block */ u64 la_size_sect; /* last agreed size, unit sectors */ + spinlock_t uuid_lock; u64 uuid[UI_SIZE]; u64 device_uuid; u32 flags; @@ -1283,8 +1284,9 @@ extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev); extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); extern void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); extern void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local); -extern void _drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local); extern void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local); +extern void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local); +extern void __drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); extern void drbd_md_set_flag(struct drbd_conf *mdev, int flags) __must_hold(local); extern void drbd_md_clear_flag(struct drbd_conf *mdev, int flags)__must_hold(local); extern int drbd_md_test_flag(struct drbd_backing_dev *, int); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f55683ad4ff..dfa08b7411c 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2125,8 +2125,10 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) if (!get_ldev_if_state(mdev, D_NEGOTIATING)) return 1; + spin_lock_irq(&mdev->ldev->md.uuid_lock); for (i = UI_CURRENT; i < UI_SIZE; i++) p.uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0; + spin_unlock_irq(&mdev->ldev->md.uuid_lock); mdev->comm_bm_set = drbd_bm_total_weight(mdev); p.uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set); @@ -4011,7 +4013,7 @@ void drbd_md_mark_dirty(struct drbd_conf *mdev) } #endif -static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local) +void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local) { int i; @@ -4019,7 +4021,7 @@ static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local) mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i]; } -void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) +void __drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) { if (idx == UI_CURRENT) { if (mdev->state.role == R_PRIMARY) @@ -4034,14 +4036,24 @@ void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) drbd_md_mark_dirty(mdev); } +void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) +{ + unsigned long flags; + spin_lock_irqsave(&mdev->ldev->md.uuid_lock, flags); + __drbd_uuid_set(mdev, idx, val); + spin_unlock_irqrestore(&mdev->ldev->md.uuid_lock, flags); +} void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) { + unsigned long flags; + spin_lock_irqsave(&mdev->ldev->md.uuid_lock, flags); if (mdev->ldev->md.uuid[idx]) { drbd_uuid_move_history(mdev); mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx]; } - _drbd_uuid_set(mdev, idx, val); + __drbd_uuid_set(mdev, idx, val); + spin_unlock_irqrestore(&mdev->ldev->md.uuid_lock, flags); } /** @@ -4054,15 +4066,20 @@ void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local) { u64 val; - unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP]; + unsigned long long bm_uuid; + + get_random_bytes(&val, sizeof(u64)); + + spin_lock_irq(&mdev->ldev->md.uuid_lock); + bm_uuid = mdev->ldev->md.uuid[UI_BITMAP]; if (bm_uuid) dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid); mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT]; + __drbd_uuid_set(mdev, UI_CURRENT, val); + spin_unlock_irq(&mdev->ldev->md.uuid_lock); - get_random_bytes(&val, sizeof(u64)); - _drbd_uuid_set(mdev, UI_CURRENT, val); drbd_print_uuids(mdev, "new current UUID"); /* get it to stable storage _now_ */ drbd_md_sync(mdev); @@ -4070,9 +4087,11 @@ void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local) void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local) { + unsigned long flags; if (mdev->ldev->md.uuid[UI_BITMAP] == 0 && val == 0) return; + spin_lock_irqsave(&mdev->ldev->md.uuid_lock, flags); if (val == 0) { drbd_uuid_move_history(mdev); mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP]; @@ -4084,6 +4103,8 @@ void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local) mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1); } + spin_unlock_irqrestore(&mdev->ldev->md.uuid_lock, flags); + drbd_md_mark_dirty(mdev); } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index edb490aad8b..ab660556a00 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -977,6 +977,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp nbc->dc.fencing = DRBD_FENCING_DEF; nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF; + spin_lock_init(&nbc->md.uuid_lock); + if (!disk_conf_from_tags(mdev, nlp->tag_list, &nbc->dc)) { retcode = ERR_MANDATORY_TAG; goto fail; @@ -2170,8 +2172,11 @@ static int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp tl = reply->tag_list; if (get_ldev(mdev)) { + unsigned long flags; + spin_lock_irqsave(&mdev->ldev->md.uuid_lock, flags); tl = tl_add_blob(tl, T_uuids, mdev->ldev->md.uuid, UI_SIZE*sizeof(u64)); tl = tl_add_int(tl, T_uuids_flags, &mdev->ldev->md.flags); + spin_unlock_irqrestore(&mdev->ldev->md.uuid_lock, flags); put_ldev(mdev); } put_unaligned(TT_END, tl++); /* Close the tag list */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c74ca2df743..434adf75259 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2392,7 +2392,9 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) && (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) { dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n"); - drbd_uuid_set_bm(mdev, 0UL); + drbd_uuid_move_history(mdev); + mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP]; + mdev->ldev->md.uuid[UI_BITMAP] = 0; drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0); @@ -2500,8 +2502,8 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l if (mdev->agreed_pro_version < 91) return -1091; - _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]); - _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]); + __drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]); + __drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]); dev_info(DEV, "Last syncUUID did not get through, corrected:\n"); drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, @@ -2554,11 +2556,14 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol mydisk = mdev->new_state_tmp.disk; dev_info(DEV, "drbd_sync_handshake:\n"); + + spin_lock_irq(&mdev->ldev->md.uuid_lock); drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0); drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]); hg = drbd_uuid_compare(mdev, &rule_nr); + spin_unlock_irq(&mdev->ldev->md.uuid_lock); dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr); From 02b91b55260f7a1bdc8da25866cf27f726f5788f Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 28 Jun 2012 18:26:52 +0200 Subject: [PATCH 161/609] drbd: introduce stop-sector to online verify We now can schedule only a specific range of sectors for online verify, or interrupt a running verify without interrupting the connection. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_main.c | 17 +++++++++++---- drivers/block/drbd/drbd_nl.c | 11 ++++++---- drivers/block/drbd/drbd_proc.c | 12 ++++++++--- drivers/block/drbd/drbd_receiver.c | 8 ++++++++ drivers/block/drbd/drbd_worker.c | 33 ++++++++++++++++++++++++------ include/linux/drbd.h | 2 +- include/linux/drbd_nl.h | 1 + 8 files changed, 67 insertions(+), 18 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 9a6d3a4a739..3cce7357402 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1052,6 +1052,7 @@ struct drbd_conf { /* where does the admin want us to start? (sector) */ sector_t ov_start_sector; + sector_t ov_stop_sector; /* where are we now? (sector) */ sector_t ov_position; /* Start sector of out of sync range (to merge printk reporting). */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index dfa08b7411c..df9965d820c 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1231,13 +1231,15 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, wake_up(&mdev->misc_wait); wake_up(&mdev->state_wait); - /* aborted verify run. log the last position */ + /* Aborted verify run, or we reached the stop sector. + * Log the last position, unless end-of-device. */ if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && - ns.conn < C_CONNECTED) { + ns.conn <= C_CONNECTED) { mdev->ov_start_sector = BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left); - dev_info(DEV, "Online Verify reached sector %llu\n", - (unsigned long long)mdev->ov_start_sector); + if (mdev->ov_left) + dev_info(DEV, "Online Verify reached sector %llu\n", + (unsigned long long)mdev->ov_start_sector); } if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) && @@ -1703,6 +1705,13 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) drbd_send_state(mdev, ns); + /* Verify finished, or reached stop sector. Peer did not know about + * the stop sector, and we may even have changed the stop sector during + * verify to interrupt/stop early. Send the new state. */ + if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED + && mdev->agreed_pro_version >= 97) + drbd_send_state(mdev, ns); + /* Wake up role changes, that were delayed because of connection establishing */ if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) { clear_bit(STATE_SENT, &mdev->flags); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ab660556a00..e2d368f1747 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2211,8 +2211,10 @@ static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { /* default to resume from last known position, if possible */ - struct start_ov args = - { .start_sector = mdev->ov_start_sector }; + struct start_ov args = { + .start_sector = mdev->ov_start_sector, + .stop_sector = ULLONG_MAX, + }; if (!start_ov_from_tags(mdev, nlp->tag_list, &args)) { reply->ret_code = ERR_MANDATORY_TAG; @@ -2224,8 +2226,9 @@ static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, drbd_suspend_io(mdev); wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); - /* w_make_ov_request expects position to be aligned */ - mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT; + /* w_make_ov_request expects start position to be aligned */ + mdev->ov_start_sector = args.start_sector & ~(BM_SECT_PER_BIT-1); + mdev->ov_stop_sector = args.stop_sector; reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); drbd_resume_io(mdev); return 0; diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 5496104f90b..a5a453b4355 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -167,18 +167,24 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) * we convert to sectors in the display below. */ unsigned long bm_bits = drbd_bm_bits(mdev); unsigned long bit_pos; + unsigned long long stop_sector = 0; if (mdev->state.conn == C_VERIFY_S || - mdev->state.conn == C_VERIFY_T) + mdev->state.conn == C_VERIFY_T) { bit_pos = bm_bits - mdev->ov_left; - else + if (mdev->agreed_pro_version >= 97) + stop_sector = mdev->ov_stop_sector; + } else bit_pos = mdev->bm_resync_fo; /* Total sectors may be slightly off for oddly * sized devices. So what. */ seq_printf(seq, - "\t%3d%% sector pos: %llu/%llu\n", + "\t%3d%% sector pos: %llu/%llu", (int)(bit_pos / (bm_bits/100+1)), (unsigned long long)bit_pos * BM_SECT_PER_BIT, (unsigned long long)bm_bits * BM_SECT_PER_BIT); + if (stop_sector != 0 && stop_sector != ULLONG_MAX) + seq_printf(seq, " stop sector: %llu", stop_sector); + seq_printf(seq, "\n"); } } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 434adf75259..280735da196 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3255,6 +3255,14 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned } } + /* explicit verify finished notification, stop sector reached. */ + if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE && + peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) { + ov_oos_print(mdev); + drbd_resync_finished(mdev); + return true; + } + /* peer says his disk is inconsistent, while we think it is uptodate, * and this happens while the peer still thinks we have a sync going on, * but we think we are already done with the sync. diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 6bce2cc179d..1352455dd7d 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -691,6 +691,7 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca int number, i, size; sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); + bool stop_sector_reached = false; if (unlikely(cancel)) return 1; @@ -699,9 +700,17 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca sector = mdev->ov_position; for (i = 0; i < number; i++) { - if (sector >= capacity) { + if (sector >= capacity) return 1; - } + + /* We check for "finished" only in the reply path: + * w_e_end_ov_reply(). + * We need to send at least one request out. */ + stop_sector_reached = i > 0 + && mdev->agreed_pro_version >= 97 + && sector >= mdev->ov_stop_sector; + if (stop_sector_reached) + break; size = BM_BLOCK_SIZE; @@ -725,7 +734,8 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca requeue: mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); - mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); + if (i == 0 || !stop_sector_reached) + mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); return 1; } @@ -808,7 +818,12 @@ int drbd_resync_finished(struct drbd_conf *mdev) dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ; if (dt <= 0) dt = 1; + db = mdev->rs_total; + /* adjust for verify start and stop sectors, respective reached position */ + if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T) + db -= mdev->ov_left; + dbdt = Bit2KB(db/dt); mdev->rs_paused /= HZ; @@ -831,7 +846,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) ns.conn = C_CONNECTED; dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", - verify_done ? "Online verify " : "Resync", + verify_done ? "Online verify" : "Resync", dt + mdev->rs_paused, mdev->rs_paused, dbdt); n_oos = drbd_bm_total_weight(mdev); @@ -912,7 +927,9 @@ out: mdev->rs_total = 0; mdev->rs_failed = 0; mdev->rs_paused = 0; - if (verify_done) + + /* reset start sector, if we reached end of device */ + if (verify_done && mdev->ov_left == 0) mdev->ov_start_sector = 0; drbd_md_sync(mdev); @@ -1158,6 +1175,7 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) unsigned int size = e->size; int digest_size; int ok, eq = 0; + bool stop_sector_reached = false; if (unlikely(cancel)) { drbd_free_ee(mdev, e); @@ -1208,7 +1226,10 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) if ((mdev->ov_left & 0x200) == 0x200) drbd_advance_rs_marks(mdev, mdev->ov_left); - if (mdev->ov_left == 0) { + stop_sector_reached = mdev->agreed_pro_version >= 97 && + (sector + (size>>9)) >= mdev->ov_stop_sector; + + if (mdev->ov_left == 0 || stop_sector_reached) { ov_oos_print(mdev); drbd_resync_finished(mdev); } diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 47e3d485058..4a7eccbd129 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -56,7 +56,7 @@ extern const char *drbd_buildtag(void); #define REL_VERSION "8.3.13" #define API_VERSION 88 #define PRO_VERSION_MIN 86 -#define PRO_VERSION_MAX 96 +#define PRO_VERSION_MAX 97 enum drbd_io_error_p { diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index a8706f08ab3..f6a576df19e 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -145,6 +145,7 @@ NL_PACKET(dump_ee, 24, NL_PACKET(start_ov, 25, NL_INT64( 66, T_MAY_IGNORE, start_sector) + NL_INT64( 90, T_MANDATORY, stop_sector) ) NL_PACKET(new_c_uuid, 26, From c12a3d8c84a5e9913a97ca5e6513c913a7e5b288 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 3 Aug 2012 15:14:04 +0200 Subject: [PATCH 162/609] drbd: Fix a potential issue with the DISCARD_CONCURRENT flag The DISCARD_CONCURRENT flag should be set on one node and cleared on the other node. As the code was before it was theoretical possible that a node accepts the meta socket, but has to close it later on, and keeps the DISCARD_CONCURRENT flag. Correct this by moving the clear_bit(DISCARD_CONCURRENT) where the packet gets sent. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_receiver.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 280735da196..55c359a1a05 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -763,8 +763,6 @@ static int drbd_connect(struct drbd_conf *mdev) if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS) return -2; - clear_bit(DISCARD_CONCURRENT, &mdev->flags); - sock = NULL; msock = NULL; @@ -784,6 +782,7 @@ static int drbd_connect(struct drbd_conf *mdev) sock = s; s = NULL; } else if (!msock) { + clear_bit(DISCARD_CONCURRENT, &mdev->flags); drbd_send_fp(mdev, s, P_HAND_SHAKE_M); msock = s; s = NULL; From 599377acb7cf3e1bdec13285096adac7ebaaaac5 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 17 Aug 2012 14:50:22 +0200 Subject: [PATCH 163/609] drbd: Avoid NetworkFailure state during disconnect Disconnecting is a cluster wide state change. In case the peer node agrees to the state transition, it sends back the fact on the meta-data connection and closes both sockets. In case the node node that initiated the state transfer sees the closing action on the data-socket, before the P_STATE_CHG_REPLY packet, it was going into one of the network failure states. At least with the fencing option set to something else thatn "dont-care", the unclean shutdown of the connection causes a short IO freeze or a fence operation. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_main.c | 3 +++ drivers/block/drbd/drbd_receiver.c | 21 ++++++++++++++++++++- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 3cce7357402..3b378124bac 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -857,6 +857,7 @@ enum { * so shrink_page_list() would not recurse into, * and potentially deadlock on, this drbd worker. */ + DISCONNECT_SENT, /* Currently the last bit in this 32bit word */ }; struct drbd_bitmap; /* opaque for drbd_conf */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index df9965d820c..7b48653d1c8 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -659,6 +659,9 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, goto abort; } + if (mask.conn == C_MASK && val.conn == C_DISCONNECTING) + set_bit(DISCONNECT_SENT, &mdev->flags); + wait_event(mdev->state_wait, (rv = _req_st_cond(mdev, mask, val))); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 55c359a1a05..64e6a619241 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -534,7 +534,6 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) dev_err(DEV, "sock_recvmsg returned %d\n", rv); break; } else if (rv == 0) { - dev_info(DEV, "sock was shut down by peer\n"); break; } else { /* signal came in, or peer/link went down, @@ -547,9 +546,21 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) set_fs(oldfs); + if (rv == 0) { + if (test_bit(DISCONNECT_SENT, &mdev->flags)) { + long t; /* time_left */ + t = wait_event_timeout(mdev->state_wait, mdev->state.conn < C_CONNECTED, + mdev->net_conf->ping_timeo * HZ/10); + if (t) + goto out; + } + dev_info(DEV, "sock was shut down by peer\n"); + } + if (rv != size) drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE)); +out: return rv; } @@ -760,6 +771,7 @@ static int drbd_connect(struct drbd_conf *mdev) D_ASSERT(!mdev->data.socket); + clear_bit(DISCONNECT_SENT, &mdev->flags); if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS) return -2; @@ -4680,6 +4692,13 @@ int drbd_asender(struct drbd_thread *thi) received += rv; buf += rv; } else if (rv == 0) { + if (test_bit(DISCONNECT_SENT, &mdev->flags)) { + long t; /* time_left */ + t = wait_event_timeout(mdev->state_wait, mdev->state.conn < C_CONNECTED, + mdev->net_conf->ping_timeo * HZ/10); + if (t) + break; + } dev_err(DEV, "meta connection shut down by peer.\n"); goto reconnect; } else if (rv == -EAGAIN) { From dbd0820c6f7b7db9a97d63ea379fc174a63ddbca Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 17 Aug 2012 16:55:47 +0200 Subject: [PATCH 164/609] drbd: Remove dead code Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_receiver.c | 36 ++++++------------------------ 1 file changed, 7 insertions(+), 29 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 64e6a619241..264ea25a9b0 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -516,37 +516,15 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) oldfs = get_fs(); set_fs(KERNEL_DS); - - for (;;) { - rv = sock_recvmsg(mdev->data.socket, &msg, size, msg.msg_flags); - if (rv == size) - break; - - /* Note: - * ECONNRESET other side closed the connection - * ERESTARTSYS (on sock) we got a signal - */ - - if (rv < 0) { - if (rv == -ECONNRESET) - dev_info(DEV, "sock was reset by peer\n"); - else if (rv != -ERESTARTSYS) - dev_err(DEV, "sock_recvmsg returned %d\n", rv); - break; - } else if (rv == 0) { - break; - } else { - /* signal came in, or peer/link went down, - * after we read a partial message - */ - /* D_ASSERT(signal_pending(current)); */ - break; - } - }; - + rv = sock_recvmsg(mdev->data.socket, &msg, size, msg.msg_flags); set_fs(oldfs); - if (rv == 0) { + if (rv < 0) { + if (rv == -ECONNRESET) + dev_info(DEV, "sock was reset by peer\n"); + else if (rv != -ERESTARTSYS) + dev_err(DEV, "sock_recvmsg returned %d\n", rv); + } else if (rv == 0) { if (test_bit(DISCONNECT_SENT, &mdev->flags)) { long t; /* time_left */ t = wait_event_timeout(mdev->state_wait, mdev->state.conn < C_CONNECTED, From 7fb907c15fb8d0e10e72c8566a13f6defab3f484 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 3 Sep 2012 15:48:21 +0200 Subject: [PATCH 165/609] drbd: panic on delayed completion of aborted requests "aborting" requests, or force-detaching the disk, is intended for completely blocked/hung local backing devices which do no longer complete requests at all, not even do error completions. In this situation, usually a hard-reset and failover is the only way out. By "aborting", basically faking a local error-completion, we allow for a more graceful swichover by cleanly migrating services. Still the affected node has to be rebooted "soon". By completing these requests, we allow the upper layers to re-use the associated data pages. If later the local backing device "recovers", and now DMAs some data from disk into the original request pages, in the best case it will just put random data into unused pages; but typically it will corrupt meanwhile completely unrelated data, causing all sorts of damage. Which means delayed successful completion, especially for READ requests, is a reason to panic(). We assume that a delayed *error* completion is OK, though we still will complain noisily about it. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_worker.c | 36 ++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 1352455dd7d..66dcb2d7ead 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -227,6 +227,42 @@ void drbd_endio_pri(struct bio *bio, int error) error = -EIO; } + /* If this request was aborted locally before, + * but now was completed "successfully", + * chances are that this caused arbitrary data corruption. + * + * "aborting" requests, or force-detaching the disk, is intended for + * completely blocked/hung local backing devices which do no longer + * complete requests at all, not even do error completions. In this + * situation, usually a hard-reset and failover is the only way out. + * + * By "aborting", basically faking a local error-completion, + * we allow for a more graceful swichover by cleanly migrating services. + * Still the affected node has to be rebooted "soon". + * + * By completing these requests, we allow the upper layers to re-use + * the associated data pages. + * + * If later the local backing device "recovers", and now DMAs some data + * from disk into the original request pages, in the best case it will + * just put random data into unused pages; but typically it will corrupt + * meanwhile completely unrelated data, causing all sorts of damage. + * + * Which means delayed successful completion, + * especially for READ requests, + * is a reason to panic(). + * + * We assume that a delayed *error* completion is OK, + * though we still will complain noisily about it. + */ + if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) { + if (__ratelimit(&drbd_ratelimit_state)) + dev_emerg(DEV, "delayed completion of aborted local request; disk-timeout may be too aggressive\n"); + + if (!error) + panic("possible random memory corruption caused by delayed completion of aborted local request\n"); + } + /* to avoid recursion in __req_mod */ if (unlikely(error)) { what = (bio_data_dir(bio) == WRITE) From 0b143d4382b62db6738196caaefa793e5c0f6690 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 20 Sep 2012 14:05:39 +0200 Subject: [PATCH 166/609] drbd: fix potential deadlock during bitmap (re-)allocation The former comment arguing that GFP_KERNEL was good enough was wrong: it did not take resize into account at all, and assumed the only path leading here was the normal attach on a still secondary device, so no deadlock would be possible. Both resize on a Primary, or attach on a diskless Primary, could potentially deadlock. drbd_bm_resize() is called while IO to the respective device is suspended, so we must use GFP_NOIO to avoid potential deadlock. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_bitmap.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index d8456649674..dda4e384929 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -373,14 +373,16 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) return old_pages; /* Trying kmalloc first, falling back to vmalloc. - * GFP_KERNEL is ok, as this is done when a lower level disk is - * "attached" to the drbd. Context is receiver thread or cqueue - * thread. As we have no disk yet, we are not in the IO path, - * not even the IO path of the peer. */ + * GFP_NOIO, as this is called while drbd IO is "suspended", + * and during resize or attach on diskless Primary, + * we must not block on IO to ourselves. + * Context is receiver thread or cqueue thread/dmsetup. */ bytes = sizeof(struct page *)*want; - new_pages = kzalloc(bytes, GFP_KERNEL); + new_pages = kzalloc(bytes, GFP_NOIO); if (!new_pages) { - new_pages = vzalloc(bytes); + new_pages = __vmalloc(bytes, + GFP_NOIO | __GFP_HIGHMEM | __GFP_ZERO, + PAGE_KERNEL); if (!new_pages) return NULL; vmalloced = 1; @@ -390,7 +392,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) for (i = 0; i < have; i++) new_pages[i] = old_pages[i]; for (; i < want; i++) { - page = alloc_page(GFP_HIGHUSER); + page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); if (!page) { bm_free_pages(new_pages + have, i - have); bm_vk_free(new_pages, vmalloced); From 8b45a5c8a190d0ea0077928fd8e95c0752e40e41 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 20 Sep 2012 14:24:20 +0200 Subject: [PATCH 167/609] drbd: a few more GFP_KERNEL -> GFP_NOIO This has not yet been observed, but conceivably, when using GFP_KERNEL allocations from drbd_md_sync(), drbd_flush_after_epoch() or receive_SyncParam(), we could trigger additional IO to our own device, or an other device in a criss-cross setup, and end up in a local deadlock, or potentially a distributed deadlock in a criss-cross setup involving the peer blocked in a similar way waiting for us to make progress. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_receiver.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 3b378124bac..557dd5a2c0c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -2497,7 +2497,7 @@ static inline void drbd_md_flush(struct drbd_conf *mdev) if (test_bit(MD_NO_FUA, &mdev->flags)) return; - r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL); + r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_NOIO, NULL); if (r) { set_bit(MD_NO_FUA, &mdev->flags); dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 264ea25a9b0..c44eaa0ee0a 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -957,7 +957,7 @@ static void drbd_flush(struct drbd_conf *mdev) int rv; if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) { - rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL, + rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_NOIO, NULL); if (rv) { dev_info(DEV, "local disk flush failed with status %d\n", rv); @@ -2907,7 +2907,7 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { - rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); + rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_NOIO); if (!rs_plan_s) { dev_err(DEV, "kmalloc of fifo_buffer failed"); goto disconnect; From 44edfb0d785ea06712b5a717fa2c1ae34e300845 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 27 Sep 2012 13:03:45 +0200 Subject: [PATCH 168/609] drbd: wait for meta data IO completion even with failed disk, unless force-detached The intention of force-detach is to be able to deal with a completely unresponsive lower level IO stack, which does not even deliver error completions anymore, but no completion at all. In all other cases, we must still wait for the meta data IO completion. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_actlog.c | 17 +++++++---------- drivers/block/drbd/drbd_bitmap.c | 8 ++++---- drivers/block/drbd/drbd_int.h | 4 ++-- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 3fbef018ce5..ec9b10cd65d 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -82,22 +82,19 @@ void drbd_md_put_buffer(struct drbd_conf *mdev) wake_up(&mdev->misc_wait); } -static bool md_io_allowed(struct drbd_conf *mdev) -{ - enum drbd_disk_state ds = mdev->state.disk; - return ds >= D_NEGOTIATING || ds == D_ATTACHING; -} - -void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, +void wait_until_done_or_force_detached(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, unsigned int *done) { long dt = bdev->dc.disk_timeout * HZ / 10; if (dt == 0) dt = MAX_SCHEDULE_TIMEOUT; - dt = wait_event_timeout(mdev->misc_wait, *done || !md_io_allowed(mdev), dt); - if (dt == 0) + dt = wait_event_timeout(mdev->misc_wait, + *done || test_bit(FORCE_DETACH, &mdev->flags), dt); + if (dt == 0) { dev_err(DEV, "meta-data IO operation timed out\n"); + drbd_chk_io_error(mdev, 1, DRBD_FORCE_DETACH); + } } static int _drbd_md_sync_page_io(struct drbd_conf *mdev, @@ -137,7 +134,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, bio_endio(bio, -EIO); else submit_bio(rw, bio); - wait_until_done_or_disk_failure(mdev, bdev, &mdev->md_io.done); + wait_until_done_or_force_detached(mdev, bdev, &mdev->md_io.done); ok = bio_flagged(bio, BIO_UPTODATE) && mdev->md_io.error == 0; out: diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index dda4e384929..8d806975804 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1090,7 +1090,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w * "in_flight reached zero, all done" event. */ if (!atomic_dec_and_test(&ctx->in_flight)) - wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); + wait_until_done_or_force_detached(mdev, mdev->ldev, &ctx->done); else kref_put(&ctx->kref, &bm_aio_ctx_destroy); @@ -1105,7 +1105,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w } if (atomic_read(&ctx->in_flight)) - err = -EIO; /* Disk failed during IO... */ + err = -EIO; /* Disk timeout/force-detach during IO... */ now = jiffies; if (rw == WRITE) { @@ -1224,11 +1224,11 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc } bm_page_io_async(ctx, idx, WRITE_SYNC); - wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); + wait_until_done_or_force_detached(mdev, mdev->ldev, &ctx->done); if (ctx->error) drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); - /* that should force detach, so the in memory bitmap will be + /* that causes us to detach, so the in memory bitmap will be * gone in a moment as well. */ mdev->bm_writ_cnt++; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 557dd5a2c0c..619a4944fee 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1581,8 +1581,8 @@ extern void *drbd_md_get_buffer(struct drbd_conf *mdev); extern void drbd_md_put_buffer(struct drbd_conf *mdev); extern int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, sector_t sector, int rw); -extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, - unsigned int *done); +extern void wait_until_done_or_force_detached(struct drbd_conf *mdev, + struct drbd_backing_dev *bdev, unsigned int *done); extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int); extern void drbd_rs_controller_reset(struct drbd_conf *mdev); From 06f10adbdb027b225fd51584a218fa8344169514 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Sat, 22 Sep 2012 20:27:19 +0200 Subject: [PATCH 169/609] drbd: prepare for more than 32 bit flags - struct drbd_conf { ... unsigned long flags; ... } + struct drbd_conf { ... unsigned long drbd_flags[N]; ... } And introduce wrapper functions for test/set/clear bit operations on this member. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_actlog.c | 4 +- drivers/block/drbd/drbd_int.h | 60 ++++++++++++++++------ drivers/block/drbd/drbd_main.c | 82 +++++++++++++++--------------- drivers/block/drbd/drbd_nl.c | 56 ++++++++++---------- drivers/block/drbd/drbd_proc.c | 2 +- drivers/block/drbd/drbd_receiver.c | 76 +++++++++++++-------------- drivers/block/drbd/drbd_req.c | 20 ++++---- drivers/block/drbd/drbd_worker.c | 14 ++--- 8 files changed, 171 insertions(+), 143 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index ec9b10cd65d..d4dd563d0d5 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -90,7 +90,7 @@ void wait_until_done_or_force_detached(struct drbd_conf *mdev, struct drbd_backi dt = MAX_SCHEDULE_TIMEOUT; dt = wait_event_timeout(mdev->misc_wait, - *done || test_bit(FORCE_DETACH, &mdev->flags), dt); + *done || drbd_test_flag(mdev, FORCE_DETACH), dt); if (dt == 0) { dev_err(DEV, "meta-data IO operation timed out\n"); drbd_chk_io_error(mdev, 1, DRBD_FORCE_DETACH); @@ -108,7 +108,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, mdev->md_io.done = 0; mdev->md_io.error = -ENODEV; - if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags)) + if ((rw & WRITE) && !drbd_test_flag(mdev, MD_NO_FUA)) rw |= REQ_FUA | REQ_FLUSH; rw |= REQ_SYNC; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 619a4944fee..125fe1481ca 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -808,7 +808,7 @@ enum { #define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST) /* global flag bits */ -enum { +enum drbd_flag { CREATE_BARRIER, /* next P_DATA is preceded by a P_BARRIER */ SIGNAL_ASENDER, /* whether asender wants to be interrupted */ SEND_PING, /* whether asender should send a ping asap */ @@ -858,6 +858,9 @@ enum { * and potentially deadlock on, this drbd worker. */ DISCONNECT_SENT, /* Currently the last bit in this 32bit word */ + + /* keep last */ + DRBD_N_FLAGS, }; struct drbd_bitmap; /* opaque for drbd_conf */ @@ -970,8 +973,7 @@ struct fifo_buffer { }; struct drbd_conf { - /* things that are stored as / read from meta data on disk */ - unsigned long flags; + unsigned long drbd_flags[(DRBD_N_FLAGS + BITS_PER_LONG -1)/BITS_PER_LONG]; /* configured by drbdsetup */ struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ @@ -1143,6 +1145,31 @@ struct drbd_conf { unsigned int local_max_bio_size; }; +static inline void drbd_set_flag(struct drbd_conf *mdev, enum drbd_flag f) +{ + set_bit(f, &mdev->drbd_flags[0]); +} + +static inline void drbd_clear_flag(struct drbd_conf *mdev, enum drbd_flag f) +{ + clear_bit(f, &mdev->drbd_flags[0]); +} + +static inline int drbd_test_flag(struct drbd_conf *mdev, enum drbd_flag f) +{ + return test_bit(f, &mdev->drbd_flags[0]); +} + +static inline int drbd_test_and_set_flag(struct drbd_conf *mdev, enum drbd_flag f) +{ + return test_and_set_bit(f, &mdev->drbd_flags[0]); +} + +static inline int drbd_test_and_clear_flag(struct drbd_conf *mdev, enum drbd_flag f) +{ + return test_and_clear_bit(f, &mdev->drbd_flags[0]); +} + static inline struct drbd_conf *minor_to_mdev(unsigned int minor) { struct drbd_conf *mdev; @@ -1812,12 +1839,12 @@ static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e) static inline void drbd_state_lock(struct drbd_conf *mdev) { wait_event(mdev->misc_wait, - !test_and_set_bit(CLUSTER_ST_CHANGE, &mdev->flags)); + !drbd_test_and_set_flag(mdev, CLUSTER_ST_CHANGE)); } static inline void drbd_state_unlock(struct drbd_conf *mdev) { - clear_bit(CLUSTER_ST_CHANGE, &mdev->flags); + drbd_clear_flag(mdev, CLUSTER_ST_CHANGE); wake_up(&mdev->misc_wait); } @@ -1874,9 +1901,9 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, /* NOTE fall through to detach case if forcedetach set */ case EP_DETACH: case EP_CALL_HELPER: - set_bit(WAS_IO_ERROR, &mdev->flags); + drbd_set_flag(mdev, WAS_IO_ERROR); if (forcedetach == DRBD_FORCE_DETACH) - set_bit(FORCE_DETACH, &mdev->flags); + drbd_set_flag(mdev, FORCE_DETACH); if (mdev->state.disk > D_FAILED) { _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL); dev_err(DEV, @@ -2037,13 +2064,13 @@ drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) static inline void wake_asender(struct drbd_conf *mdev) { - if (test_bit(SIGNAL_ASENDER, &mdev->flags)) + if (drbd_test_flag(mdev, SIGNAL_ASENDER)) force_sig(DRBD_SIG, mdev->asender.task); } static inline void request_ping(struct drbd_conf *mdev) { - set_bit(SEND_PING, &mdev->flags); + drbd_set_flag(mdev, SEND_PING); wake_asender(mdev); } @@ -2374,7 +2401,7 @@ static inline bool may_inc_ap_bio(struct drbd_conf *mdev) if (is_susp(mdev->state)) return false; - if (test_bit(SUSPEND_IO, &mdev->flags)) + if (drbd_test_flag(mdev, SUSPEND_IO)) return false; /* to avoid potential deadlock or bitmap corruption, @@ -2389,7 +2416,7 @@ static inline bool may_inc_ap_bio(struct drbd_conf *mdev) * and we are within the spinlock anyways, we have this workaround. */ if (atomic_read(&mdev->ap_bio_cnt) > mxb) return false; - if (test_bit(BITMAP_IO, &mdev->flags)) + if (drbd_test_flag(mdev, BITMAP_IO)) return false; return true; } @@ -2427,8 +2454,8 @@ static inline void dec_ap_bio(struct drbd_conf *mdev) D_ASSERT(ap_bio >= 0); - if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) { - if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) + if (ap_bio == 0 && drbd_test_flag(mdev, BITMAP_IO)) { + if (!drbd_test_and_set_flag(mdev, BITMAP_IO_QUEUED)) drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w); } @@ -2477,7 +2504,7 @@ static inline void drbd_update_congested(struct drbd_conf *mdev) { struct sock *sk = mdev->data.socket->sk; if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5) - set_bit(NET_CONGESTED, &mdev->flags); + drbd_set_flag(mdev, NET_CONGESTED); } static inline int drbd_queue_order_type(struct drbd_conf *mdev) @@ -2494,14 +2521,15 @@ static inline void drbd_md_flush(struct drbd_conf *mdev) { int r; - if (test_bit(MD_NO_FUA, &mdev->flags)) + if (drbd_test_flag(mdev, MD_NO_FUA)) return; r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_NOIO, NULL); if (r) { - set_bit(MD_NO_FUA, &mdev->flags); + drbd_set_flag(mdev, MD_NO_FUA); dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r); } } + #endif diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 7b48653d1c8..d8ba5c42670 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -322,7 +322,7 @@ void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, list_splice_init(&b->requests, &mdev->barrier_acked_requests); nob = b->next; - if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { + if (drbd_test_and_clear_flag(mdev, CREATE_BARRIER)) { _tl_add_barrier(mdev, b); if (nob) mdev->oldest_tle = nob; @@ -381,7 +381,7 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) if (b->w.cb == NULL) { b->w.cb = w_send_barrier; inc_ap_pending(mdev); - set_bit(CREATE_BARRIER, &mdev->flags); + drbd_set_flag(mdev, CREATE_BARRIER); } drbd_queue_work(&mdev->data.work, &b->w); @@ -464,7 +464,7 @@ static void _tl_clear(struct drbd_conf *mdev) } /* ensure bit indicating barrier is required is clear */ - clear_bit(CREATE_BARRIER, &mdev->flags); + drbd_clear_flag(mdev, CREATE_BARRIER); memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *)); @@ -582,10 +582,10 @@ _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, unsigned long flags; enum drbd_state_rv rv; - if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags)) + if (drbd_test_and_clear_flag(mdev, CL_ST_CHG_SUCCESS)) return SS_CW_SUCCESS; - if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags)) + if (drbd_test_and_clear_flag(mdev, CL_ST_CHG_FAIL)) return SS_CW_FAILED_BY_PEER; rv = 0; @@ -660,7 +660,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, } if (mask.conn == C_MASK && val.conn == C_DISCONNECTING) - set_bit(DISCONNECT_SENT, &mdev->flags); + drbd_set_flag(mdev, DISCONNECT_SENT); wait_event(mdev->state_wait, (rv = _req_st_cond(mdev, mask, val))); @@ -850,7 +850,7 @@ is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns, /* While establishing a connection only allow cstate to change. Delay/refuse role changes, detach attach etc... */ - if (test_bit(STATE_SENT, &mdev->flags) && + if (drbd_test_flag(mdev, STATE_SENT) && !(os.conn == C_WF_REPORT_PARAMS || (ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION))) rv = SS_IN_TRANSIENT_STATE; @@ -1109,7 +1109,7 @@ static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs) static void drbd_resume_al(struct drbd_conf *mdev) { - if (test_and_clear_bit(AL_SUSPENDED, &mdev->flags)) + if (drbd_test_and_clear_flag(mdev, AL_SUSPENDED)) dev_info(DEV, "Resumed AL updates\n"); } @@ -1215,8 +1215,8 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, if (ns.disk == D_DISKLESS && ns.conn == C_STANDALONE && ns.role == R_SECONDARY && - !test_and_set_bit(CONFIG_PENDING, &mdev->flags)) - set_bit(DEVICE_DYING, &mdev->flags); + !drbd_test_and_set_flag(mdev, CONFIG_PENDING)) + drbd_set_flag(mdev, DEVICE_DYING); /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference * on the ldev here, to be sure the transition -> D_DISKLESS resp. @@ -1291,7 +1291,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE| MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY); - if (test_bit(CRASHED_PRIMARY, &mdev->flags)) + if (drbd_test_flag(mdev, CRASHED_PRIMARY)) mdf |= MDF_CRASHED_PRIMARY; if (mdev->state.role == R_PRIMARY || (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY)) @@ -1316,7 +1316,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, /* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */ if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT && os.peer == R_SECONDARY && ns.peer == R_PRIMARY) - set_bit(CONSIDER_RESYNC, &mdev->flags); + drbd_set_flag(mdev, CONSIDER_RESYNC); /* Receiver should clean up itself */ if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING) @@ -1400,7 +1400,7 @@ int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, D_ASSERT(current == mdev->worker.task); /* open coded non-blocking drbd_suspend_io(mdev); */ - set_bit(SUSPEND_IO, &mdev->flags); + drbd_set_flag(mdev, SUSPEND_IO); drbd_bm_lock(mdev, why, flags); rv = io_fn(mdev); @@ -1426,7 +1426,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state nsm = (union drbd_state){ .i = -1 }; if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) { - clear_bit(CRASHED_PRIMARY, &mdev->flags); + drbd_clear_flag(mdev, CRASHED_PRIMARY); if (mdev->p_uuid) mdev->p_uuid[UI_FLAGS] &= ~((u64)2); } @@ -1466,9 +1466,9 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, if (ns.susp_fen) { /* case1: The outdate peer handler is successful: */ if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) { - if (test_bit(NEW_CUR_UUID, &mdev->flags)) { + if (drbd_test_flag(mdev, NEW_CUR_UUID)) { drbd_uuid_new_current(mdev); - clear_bit(NEW_CUR_UUID, &mdev->flags); + drbd_clear_flag(mdev, NEW_CUR_UUID); } spin_lock_irq(&mdev->req_lock); _tl_clear(mdev); @@ -1477,7 +1477,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, } /* case2: The connection was established again: */ if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { - clear_bit(NEW_CUR_UUID, &mdev->flags); + drbd_clear_flag(mdev, NEW_CUR_UUID); what = resend; nsm.susp_fen = 0; } @@ -1534,7 +1534,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { if (is_susp(mdev->state)) { - set_bit(NEW_CUR_UUID, &mdev->flags); + drbd_set_flag(mdev, NEW_CUR_UUID); } else { drbd_uuid_new_current(mdev); drbd_send_uuids(mdev); @@ -1625,7 +1625,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, * we might come from an failed Attach before ldev was set. */ if (mdev->ldev) { eh = mdev->ldev->dc.on_io_error; - was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); + was_io_error = drbd_test_and_clear_flag(mdev, WAS_IO_ERROR); if (was_io_error && eh == EP_CALL_HELPER) drbd_khelper(mdev, "local-io-error"); @@ -1643,7 +1643,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, * So aborting local requests may cause crashes, * or even worse, silent data corruption. */ - if (test_and_clear_bit(FORCE_DETACH, &mdev->flags)) + if (drbd_test_and_clear_flag(mdev, FORCE_DETACH)) tl_abort_disk_io(mdev); /* current state still has to be D_FAILED, @@ -1692,7 +1692,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* Disks got bigger while they were detached */ if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && - test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) { + drbd_test_and_clear_flag(mdev, RESYNC_AFTER_NEG)) { if (ns.conn == C_CONNECTED) resync_after_online_grow(mdev); } @@ -1717,7 +1717,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* Wake up role changes, that were delayed because of connection establishing */ if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) { - clear_bit(STATE_SENT, &mdev->flags); + drbd_clear_flag(mdev, STATE_SENT); wake_up(&mdev->state_wait); } @@ -1750,7 +1750,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, if (os.aftr_isp != ns.aftr_isp) resume_next_sg(mdev); /* set in __drbd_set_state, unless CONFIG_PENDING was set */ - if (test_bit(DEVICE_DYING, &mdev->flags)) + if (drbd_test_flag(mdev, DEVICE_DYING)) drbd_thread_stop_nowait(&mdev->worker); } @@ -2145,7 +2145,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) mdev->comm_bm_set = drbd_bm_total_weight(mdev); p.uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set); uuid_flags |= mdev->net_conf->want_lose ? 1 : 0; - uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0; + uuid_flags |= drbd_test_flag(mdev, CRASHED_PRIMARY) ? 2 : 0; uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0; p.uuid[UI_FLAGS] = cpu_to_be64(uuid_flags); @@ -2775,7 +2775,7 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, offset += sent; } while (len > 0 /* THINK && mdev->cstate >= C_CONNECTED*/); set_fs(oldfs); - clear_bit(NET_CONGESTED, &mdev->flags); + drbd_clear_flag(mdev, NET_CONGESTED); ok = (len == 0); if (likely(ok)) @@ -2877,7 +2877,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) dp_flags |= DP_MAY_SET_IN_SYNC; p.dp_flags = cpu_to_be32(dp_flags); - set_bit(UNPLUG_REMOTE, &mdev->flags); + drbd_set_flag(mdev, UNPLUG_REMOTE); ok = (sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0)); if (ok && dgs) { @@ -3056,7 +3056,7 @@ int drbd_send(struct drbd_conf *mdev, struct socket *sock, } while (sent < size); if (sock == mdev->data.socket) - clear_bit(NET_CONGESTED, &mdev->flags); + drbd_clear_flag(mdev, NET_CONGESTED); if (rv <= 0) { if (rv != -EAGAIN) { @@ -3263,7 +3263,7 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev) } drbd_free_resources(mdev); - clear_bit(AL_SUSPENDED, &mdev->flags); + drbd_clear_flag(mdev, AL_SUSPENDED); /* * currently we drbd_init_ee only on module load, so @@ -3556,7 +3556,7 @@ static int drbd_congested(void *congested_data, int bdi_bits) goto out; } - if (test_bit(CALLBACK_PENDING, &mdev->flags)) { + if (drbd_test_flag(mdev, CALLBACK_PENDING)) { r |= (1 << BDI_async_congested); /* Without good local data, we would need to read from remote, * and that would need the worker thread as well, which is @@ -3580,7 +3580,7 @@ static int drbd_congested(void *congested_data, int bdi_bits) reason = 'b'; } - if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->flags)) { + if (bdi_bits & (1 << BDI_async_congested) && drbd_test_flag(mdev, NET_CONGESTED)) { r |= (1 << BDI_async_congested); reason = reason == 'b' ? 'a' : 'n'; } @@ -3867,7 +3867,7 @@ void drbd_md_sync(struct drbd_conf *mdev) del_timer(&mdev->md_sync_timer); /* timer may be rearmed by drbd_md_mark_dirty() now. */ - if (!test_and_clear_bit(MD_DIRTY, &mdev->flags)) + if (!drbd_test_and_clear_flag(mdev, MD_DIRTY)) return; /* We use here D_FAILED and not D_ATTACHING because we try to write @@ -4011,7 +4011,7 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) #ifdef DEBUG void drbd_md_mark_dirty_(struct drbd_conf *mdev, unsigned int line, const char *func) { - if (!test_and_set_bit(MD_DIRTY, &mdev->flags)) { + if (!drbd_test_and_set_flag(mdev, MD_DIRTY)) { mod_timer(&mdev->md_sync_timer, jiffies + HZ); mdev->last_md_mark_dirty.line = line; mdev->last_md_mark_dirty.func = func; @@ -4020,7 +4020,7 @@ void drbd_md_mark_dirty_(struct drbd_conf *mdev, unsigned int line, const char * #else void drbd_md_mark_dirty(struct drbd_conf *mdev) { - if (!test_and_set_bit(MD_DIRTY, &mdev->flags)) + if (!drbd_test_and_set_flag(mdev, MD_DIRTY)) mod_timer(&mdev->md_sync_timer, jiffies + 5*HZ); } #endif @@ -4182,14 +4182,14 @@ static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused) put_ldev(mdev); } - clear_bit(BITMAP_IO, &mdev->flags); + drbd_clear_flag(mdev, BITMAP_IO); smp_mb__after_clear_bit(); wake_up(&mdev->misc_wait); if (work->done) work->done(mdev, rv); - clear_bit(BITMAP_IO_QUEUED, &mdev->flags); + drbd_clear_flag(mdev, BITMAP_IO_QUEUED); work->why = NULL; work->flags = 0; @@ -4210,7 +4210,7 @@ void drbd_ldev_destroy(struct drbd_conf *mdev) __free_page(mdev->md_io_tmpp); mdev->md_io_tmpp = NULL; } - clear_bit(GO_DISKLESS, &mdev->flags); + drbd_clear_flag(mdev, GO_DISKLESS); } static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused) @@ -4227,7 +4227,7 @@ static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused void drbd_go_diskless(struct drbd_conf *mdev) { D_ASSERT(mdev->state.disk == D_FAILED); - if (!test_and_set_bit(GO_DISKLESS, &mdev->flags)) + if (!drbd_test_and_set_flag(mdev, GO_DISKLESS)) drbd_queue_work(&mdev->data.work, &mdev->go_diskless); } @@ -4250,8 +4250,8 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev, { D_ASSERT(current == mdev->worker.task); - D_ASSERT(!test_bit(BITMAP_IO_QUEUED, &mdev->flags)); - D_ASSERT(!test_bit(BITMAP_IO, &mdev->flags)); + D_ASSERT(!drbd_test_flag(mdev, BITMAP_IO_QUEUED)); + D_ASSERT(!drbd_test_flag(mdev, BITMAP_IO)); D_ASSERT(list_empty(&mdev->bm_io_work.w.list)); if (mdev->bm_io_work.why) dev_err(DEV, "FIXME going to queue '%s' but '%s' still pending?\n", @@ -4263,9 +4263,9 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev, mdev->bm_io_work.flags = flags; spin_lock_irq(&mdev->req_lock); - set_bit(BITMAP_IO, &mdev->flags); + drbd_set_flag(mdev, BITMAP_IO); if (atomic_read(&mdev->ap_bio_cnt) == 0) { - if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) + if (!drbd_test_and_set_flag(mdev, BITMAP_IO_QUEUED)) drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w); } spin_unlock_irq(&mdev->req_lock); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index e2d368f1747..42d172877ae 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -148,7 +148,7 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) int ret; if (current == mdev->worker.task) - set_bit(CALLBACK_PENDING, &mdev->flags); + drbd_set_flag(mdev, CALLBACK_PENDING); snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev)); @@ -193,7 +193,7 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) (ret >> 8) & 0xff, ret); if (current == mdev->worker.task) - clear_bit(CALLBACK_PENDING, &mdev->flags); + drbd_clear_flag(mdev, CALLBACK_PENDING); if (ret < 0) /* Ignore any ERRNOs we got. */ ret = 0; @@ -295,7 +295,7 @@ static int _try_outdate_peer_async(void *data) */ spin_lock_irq(&mdev->req_lock); ns = mdev->state; - if (ns.conn < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &mdev->flags)) { + if (ns.conn < C_WF_REPORT_PARAMS && !drbd_test_flag(mdev, STATE_SENT)) { ns.pdsk = nps; _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); } @@ -583,7 +583,7 @@ char *ppsize(char *buf, unsigned long long size) */ void drbd_suspend_io(struct drbd_conf *mdev) { - set_bit(SUSPEND_IO, &mdev->flags); + drbd_set_flag(mdev, SUSPEND_IO); if (is_susp(mdev->state)) return; wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); @@ -591,7 +591,7 @@ void drbd_suspend_io(struct drbd_conf *mdev) void drbd_resume_io(struct drbd_conf *mdev) { - clear_bit(SUSPEND_IO, &mdev->flags); + drbd_clear_flag(mdev, SUSPEND_IO); wake_up(&mdev->misc_wait); } @@ -881,8 +881,8 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev) */ static void drbd_reconfig_start(struct drbd_conf *mdev) { - wait_event(mdev->state_wait, !test_and_set_bit(CONFIG_PENDING, &mdev->flags)); - wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags)); + wait_event(mdev->state_wait, !drbd_test_and_set_flag(mdev, CONFIG_PENDING)); + wait_event(mdev->state_wait, !drbd_test_flag(mdev, DEVICE_DYING)); drbd_thread_start(&mdev->worker); drbd_flush_workqueue(mdev); } @@ -896,10 +896,10 @@ static void drbd_reconfig_done(struct drbd_conf *mdev) if (mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE && mdev->state.role == R_SECONDARY) { - set_bit(DEVICE_DYING, &mdev->flags); + drbd_set_flag(mdev, DEVICE_DYING); drbd_thread_stop_nowait(&mdev->worker); } else - clear_bit(CONFIG_PENDING, &mdev->flags); + drbd_clear_flag(mdev, CONFIG_PENDING); spin_unlock_irq(&mdev->req_lock); wake_up(&mdev->state_wait); } @@ -919,7 +919,7 @@ static void drbd_suspend_al(struct drbd_conf *mdev) spin_lock_irq(&mdev->req_lock); if (mdev->state.conn < C_CONNECTED) - s = !test_and_set_bit(AL_SUSPENDED, &mdev->flags); + s = !drbd_test_and_set_flag(mdev, AL_SUSPENDED); spin_unlock_irq(&mdev->req_lock); @@ -958,7 +958,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); /* make sure there is no leftover from previous force-detach attempts */ - clear_bit(FORCE_DETACH, &mdev->flags); + drbd_clear_flag(mdev, FORCE_DETACH); /* and no leftover from previously aborted resync or verify, either */ mdev->rs_total = 0; @@ -1168,9 +1168,9 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp /* Reset the "barriers don't work" bits here, then force meta data to * be written, to ensure we determine if barriers are supported. */ if (nbc->dc.no_md_flush) - set_bit(MD_NO_FUA, &mdev->flags); + drbd_set_flag(mdev, MD_NO_FUA); else - clear_bit(MD_NO_FUA, &mdev->flags); + drbd_clear_flag(mdev, MD_NO_FUA); /* Point of no return reached. * Devices and memory are no longer released by error cleanup below. @@ -1186,13 +1186,13 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp drbd_bump_write_ordering(mdev, WO_bdev_flush); if (drbd_md_test_flag(mdev->ldev, MDF_CRASHED_PRIMARY)) - set_bit(CRASHED_PRIMARY, &mdev->flags); + drbd_set_flag(mdev, CRASHED_PRIMARY); else - clear_bit(CRASHED_PRIMARY, &mdev->flags); + drbd_clear_flag(mdev, CRASHED_PRIMARY); if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) && !(mdev->state.role == R_PRIMARY && mdev->state.susp_nod)) { - set_bit(CRASHED_PRIMARY, &mdev->flags); + drbd_set_flag(mdev, CRASHED_PRIMARY); cp_discovered = 1; } @@ -1217,18 +1217,18 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp * so we can automatically recover from a crash of a * degraded but active "cluster" after a certain timeout. */ - clear_bit(USE_DEGR_WFC_T, &mdev->flags); + drbd_clear_flag(mdev, USE_DEGR_WFC_T); if (mdev->state.role != R_PRIMARY && drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) && !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND)) - set_bit(USE_DEGR_WFC_T, &mdev->flags); + drbd_set_flag(mdev, USE_DEGR_WFC_T); dd = drbd_determine_dev_size(mdev, 0); if (dd == dev_size_error) { retcode = ERR_NOMEM_BITMAP; goto force_diskless_dec; } else if (dd == grew) - set_bit(RESYNC_AFTER_NEG, &mdev->flags); + drbd_set_flag(mdev, RESYNC_AFTER_NEG); if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) { dev_info(DEV, "Assuming that all blocks are out of sync " @@ -1362,7 +1362,7 @@ static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, } if (dt.detach_force) { - set_bit(FORCE_DETACH, &mdev->flags); + drbd_set_flag(mdev, FORCE_DETACH); drbd_force_state(mdev, NS(disk, D_FAILED)); reply->ret_code = SS_SUCCESS; goto out; @@ -1707,7 +1707,7 @@ void resync_after_online_grow(struct drbd_conf *mdev) if (mdev->state.role != mdev->state.peer) iass = (mdev->state.role == R_PRIMARY); else - iass = test_bit(DISCARD_CONCURRENT, &mdev->flags); + iass = drbd_test_flag(mdev, DISCARD_CONCURRENT); if (iass) drbd_start_resync(mdev, C_SYNC_SOURCE); @@ -1765,7 +1765,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, if (mdev->state.conn == C_CONNECTED) { if (dd == grew) - set_bit(RESIZE_PENDING, &mdev->flags); + drbd_set_flag(mdev, RESIZE_PENDING); drbd_send_uuids(mdev); drbd_send_sizes(mdev, 1, ddsf); @@ -1983,7 +1983,7 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl * resync just being finished, wait for it before requesting a new resync. * Also wait for it's after_state_ch(). */ drbd_suspend_io(mdev); - wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); + wait_event(mdev->misc_wait, !drbd_test_flag(mdev, BITMAP_IO)); drbd_flush_workqueue(mdev); retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); @@ -2026,7 +2026,7 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re * resync just being finished, wait for it before requesting a new resync. * Also wait for it's after_state_ch(). */ drbd_suspend_io(mdev); - wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); + wait_event(mdev->misc_wait, !drbd_test_flag(mdev, BITMAP_IO)); drbd_flush_workqueue(mdev); retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); @@ -2094,9 +2094,9 @@ static int drbd_nl_suspend_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { - if (test_bit(NEW_CUR_UUID, &mdev->flags)) { + if (drbd_test_flag(mdev, NEW_CUR_UUID)) { drbd_uuid_new_current(mdev); - clear_bit(NEW_CUR_UUID, &mdev->flags); + drbd_clear_flag(mdev, NEW_CUR_UUID); } drbd_suspend_io(mdev); reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0)); @@ -2199,7 +2199,7 @@ static int drbd_nl_get_timeout_flag(struct drbd_conf *mdev, struct drbd_nl_cfg_r tl = reply->tag_list; rv = mdev->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED : - test_bit(USE_DEGR_WFC_T, &mdev->flags) ? UT_DEGRADED : UT_DEFAULT; + drbd_test_flag(mdev, USE_DEGR_WFC_T) ? UT_DEGRADED : UT_DEFAULT; tl = tl_add_blob(tl, T_use_degraded, &rv, sizeof(rv)); put_unaligned(TT_END, tl++); /* Close the tag list */ @@ -2224,7 +2224,7 @@ static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, /* If there is still bitmap IO pending, e.g. previous resync or verify * just being finished, wait for it before requesting a new resync. */ drbd_suspend_io(mdev); - wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); + wait_event(mdev->misc_wait, !drbd_test_flag(mdev, BITMAP_IO)); /* w_make_ov_request expects start position to be aligned */ mdev->ov_start_sector = args.start_sector & ~(BM_SECT_PER_BIT-1); diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index a5a453b4355..662bc8ef830 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -270,7 +270,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v) mdev->state.peer_isp ? 'p' : '-', mdev->state.user_isp ? 'u' : '-', mdev->congestion_reason ?: '-', - test_bit(AL_SUSPENDED, &mdev->flags) ? 's' : '-', + drbd_test_flag(mdev, AL_SUSPENDED) ? 's' : '-', mdev->send_cnt/2, mdev->recv_cnt/2, mdev->writ_cnt/2, diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c44eaa0ee0a..eb0cafea142 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -525,7 +525,7 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) else if (rv != -ERESTARTSYS) dev_err(DEV, "sock_recvmsg returned %d\n", rv); } else if (rv == 0) { - if (test_bit(DISCONNECT_SENT, &mdev->flags)) { + if (drbd_test_flag(mdev, DISCONNECT_SENT)) { long t; /* time_left */ t = wait_event_timeout(mdev->state_wait, mdev->state.conn < C_CONNECTED, mdev->net_conf->ping_timeo * HZ/10); @@ -749,7 +749,7 @@ static int drbd_connect(struct drbd_conf *mdev) D_ASSERT(!mdev->data.socket); - clear_bit(DISCONNECT_SENT, &mdev->flags); + drbd_clear_flag(mdev, DISCONNECT_SENT); if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS) return -2; @@ -772,7 +772,7 @@ static int drbd_connect(struct drbd_conf *mdev) sock = s; s = NULL; } else if (!msock) { - clear_bit(DISCARD_CONCURRENT, &mdev->flags); + drbd_clear_flag(mdev, DISCARD_CONCURRENT); drbd_send_fp(mdev, s, P_HAND_SHAKE_M); msock = s; s = NULL; @@ -810,7 +810,7 @@ retry: sock_release(msock); } msock = s; - set_bit(DISCARD_CONCURRENT, &mdev->flags); + drbd_set_flag(mdev, DISCARD_CONCURRENT); break; default: dev_warn(DEV, "Error receiving initial packet\n"); @@ -892,18 +892,18 @@ retry: if (drbd_send_protocol(mdev) == -1) return -1; - set_bit(STATE_SENT, &mdev->flags); + drbd_set_flag(mdev, STATE_SENT); drbd_send_sync_param(mdev, &mdev->sync_conf); drbd_send_sizes(mdev, 0, 0); drbd_send_uuids(mdev); drbd_send_current_state(mdev); - clear_bit(USE_DEGR_WFC_T, &mdev->flags); - clear_bit(RESIZE_PENDING, &mdev->flags); + drbd_clear_flag(mdev, USE_DEGR_WFC_T); + drbd_clear_flag(mdev, RESIZE_PENDING); spin_lock_irq(&mdev->req_lock); rv = _drbd_set_state(_NS(mdev, conn, C_WF_REPORT_PARAMS), CS_VERBOSE, NULL); if (mdev->state.conn != C_WF_REPORT_PARAMS) - clear_bit(STATE_SENT, &mdev->flags); + drbd_clear_flag(mdev, STATE_SENT); spin_unlock_irq(&mdev->req_lock); if (rv < SS_SUCCESS) @@ -1732,7 +1732,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned /* don't get the req_lock yet, * we may sleep in drbd_wait_peer_seq */ const int size = e->size; - const int discard = test_bit(DISCARD_CONCURRENT, &mdev->flags); + const int discard = drbd_test_flag(mdev, DISCARD_CONCURRENT); DEFINE_WAIT(wait); struct drbd_request *i; struct hlist_node *n; @@ -2200,7 +2200,7 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) "Using discard-least-changes instead\n"); case ASB_DISCARD_ZERO_CHG: if (ch_peer == 0 && ch_self == 0) { - rv = test_bit(DISCARD_CONCURRENT, &mdev->flags) + rv = drbd_test_flag(mdev, DISCARD_CONCURRENT) ? -1 : 1; break; } else { @@ -2216,7 +2216,7 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) rv = 1; else /* ( ch_self == ch_peer ) */ /* Well, then use something else. */ - rv = test_bit(DISCARD_CONCURRENT, &mdev->flags) + rv = drbd_test_flag(mdev, DISCARD_CONCURRENT) ? -1 : 1; break; case ASB_DISCARD_LOCAL: @@ -2420,7 +2420,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l } /* Common power [off|failure] */ - rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) + + rct = (drbd_test_flag(mdev, CRASHED_PRIMARY) ? 1 : 0) + (mdev->p_uuid[UI_FLAGS] & 2); /* lowest bit is set when we were primary, * next bit (weight 2) is set when peer was primary */ @@ -2431,7 +2431,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l case 1: /* self_pri && !peer_pri */ return 1; case 2: /* !self_pri && peer_pri */ return -1; case 3: /* self_pri && peer_pri */ - dc = test_bit(DISCARD_CONCURRENT, &mdev->flags); + dc = drbd_test_flag(mdev, DISCARD_CONCURRENT); return dc ? -1 : 1; } } @@ -2648,7 +2648,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } } - if (mdev->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) { + if (mdev->net_conf->dry_run || drbd_test_flag(mdev, CONN_DRY_RUN)) { if (hg == 0) dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n"); else @@ -2716,10 +2716,10 @@ static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsig cf = be32_to_cpu(p->conn_flags); p_want_lose = cf & CF_WANT_LOSE; - clear_bit(CONN_DRY_RUN, &mdev->flags); + drbd_clear_flag(mdev, CONN_DRY_RUN); if (cf & CF_DRY_RUN) - set_bit(CONN_DRY_RUN, &mdev->flags); + drbd_set_flag(mdev, CONN_DRY_RUN); if (p_proto != mdev->net_conf->wire_protocol) { dev_err(DEV, "incompatible communication protocols\n"); @@ -3051,7 +3051,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned * needs to know my new size... */ drbd_send_sizes(mdev, 0, ddsf); } - if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) || + if (drbd_test_and_clear_flag(mdev, RESIZE_PENDING) || (dd == grew && mdev->state.conn == C_CONNECTED)) { if (mdev->state.pdsk >= D_INCONSISTENT && mdev->state.disk >= D_INCONSISTENT) { @@ -3060,7 +3060,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned else resync_after_online_grow(mdev); } else - set_bit(RESYNC_AFTER_NEG, &mdev->flags); + drbd_set_flag(mdev, RESYNC_AFTER_NEG); } } @@ -3121,7 +3121,7 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned ongoing cluster wide state change is finished. That is important if we are primary and are detaching from our disk. We need to see the new disk state... */ - wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags)); + wait_event(mdev->misc_wait, !drbd_test_flag(mdev, CLUSTER_ST_CHANGE)); if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT) updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); @@ -3170,8 +3170,8 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsi mask.i = be32_to_cpu(p->mask); val.i = be32_to_cpu(p->val); - if (test_bit(DISCARD_CONCURRENT, &mdev->flags) && - test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) { + if (drbd_test_flag(mdev, DISCARD_CONCURRENT) && + drbd_test_flag(mdev, CLUSTER_ST_CHANGE)) { drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG); return true; } @@ -3280,7 +3280,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned os.disk == D_NEGOTIATING)); /* if we have both been inconsistent, and the peer has been * forced to be UpToDate with --overwrite-data */ - cr |= test_bit(CONSIDER_RESYNC, &mdev->flags); + cr |= drbd_test_flag(mdev, CONSIDER_RESYNC); /* if we had been plain connected, and the admin requested to * start a sync by "invalidate" or "invalidate-remote" */ cr |= (os.conn == C_CONNECTED && @@ -3300,7 +3300,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned peer_state.disk = D_DISKLESS; real_peer_disk = D_DISKLESS; } else { - if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags)) + if (drbd_test_and_clear_flag(mdev, CONN_DRY_RUN)) return false; D_ASSERT(os.conn == C_WF_REPORT_PARAMS); drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); @@ -3312,7 +3312,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned spin_lock_irq(&mdev->req_lock); if (mdev->state.i != os.i) goto retry; - clear_bit(CONSIDER_RESYNC, &mdev->flags); + drbd_clear_flag(mdev, CONSIDER_RESYNC); ns.peer = peer_state.role; ns.pdsk = real_peer_disk; ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp); @@ -3320,14 +3320,14 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned ns.disk = mdev->new_state_tmp.disk; cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD); if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED && - test_bit(NEW_CUR_UUID, &mdev->flags)) { + drbd_test_flag(mdev, NEW_CUR_UUID)) { /* Do not allow tl_restart(resend) for a rebooted peer. We can only allow this for temporal network outages! */ spin_unlock_irq(&mdev->req_lock); dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n"); tl_clear(mdev); drbd_uuid_new_current(mdev); - clear_bit(NEW_CUR_UUID, &mdev->flags); + drbd_clear_flag(mdev, NEW_CUR_UUID); drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0)); return false; } @@ -3931,7 +3931,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) /* serialize with bitmap writeout triggered by the state change, * if any. */ - wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); + wait_event(mdev->misc_wait, !drbd_test_flag(mdev, BITMAP_IO)); /* tcp_close and release of sendpage pages can be deferred. I don't * want to use SO_LINGER, because apparently it can be deferred for @@ -4267,9 +4267,9 @@ static int got_RqSReply(struct drbd_conf *mdev, struct p_header80 *h) int retcode = be32_to_cpu(p->retcode); if (retcode >= SS_SUCCESS) { - set_bit(CL_ST_CHG_SUCCESS, &mdev->flags); + drbd_set_flag(mdev, CL_ST_CHG_SUCCESS); } else { - set_bit(CL_ST_CHG_FAIL, &mdev->flags); + drbd_set_flag(mdev, CL_ST_CHG_FAIL); dev_err(DEV, "Requested state change failed by peer: %s (%d)\n", drbd_set_st_err_str(retcode), retcode); } @@ -4288,7 +4288,7 @@ static int got_PingAck(struct drbd_conf *mdev, struct p_header80 *h) { /* restore idle timeout */ mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ; - if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags)) + if (!drbd_test_and_set_flag(mdev, GOT_PING_ACK)) wake_up(&mdev->misc_wait); return true; @@ -4504,7 +4504,7 @@ static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h) if (mdev->state.conn == C_AHEAD && atomic_read(&mdev->ap_in_flight) == 0 && - !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags)) { + !drbd_test_and_set_flag(mdev, AHEAD_TO_SYNC_SOURCE)) { mdev->start_resync_timer.expires = jiffies + HZ; add_timer(&mdev->start_resync_timer); } @@ -4614,7 +4614,7 @@ int drbd_asender(struct drbd_thread *thi) while (get_t_state(thi) == Running) { drbd_thread_current_set_cpu(mdev); - if (test_and_clear_bit(SEND_PING, &mdev->flags)) { + if (drbd_test_and_clear_flag(mdev, SEND_PING)) { ERR_IF(!drbd_send_ping(mdev)) goto reconnect; mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_timeo*HZ/10; @@ -4627,12 +4627,12 @@ int drbd_asender(struct drbd_thread *thi) 3 < atomic_read(&mdev->unacked_cnt)) drbd_tcp_cork(mdev->meta.socket); while (1) { - clear_bit(SIGNAL_ASENDER, &mdev->flags); + drbd_clear_flag(mdev, SIGNAL_ASENDER); flush_signals(current); if (!drbd_process_done_ee(mdev)) goto reconnect; /* to avoid race with newly queued ACKs */ - set_bit(SIGNAL_ASENDER, &mdev->flags); + drbd_set_flag(mdev, SIGNAL_ASENDER); spin_lock_irq(&mdev->req_lock); empty = list_empty(&mdev->done_ee); spin_unlock_irq(&mdev->req_lock); @@ -4652,7 +4652,7 @@ int drbd_asender(struct drbd_thread *thi) rv = drbd_recv_short(mdev, mdev->meta.socket, buf, expect-received, 0); - clear_bit(SIGNAL_ASENDER, &mdev->flags); + drbd_clear_flag(mdev, SIGNAL_ASENDER); flush_signals(current); @@ -4670,7 +4670,7 @@ int drbd_asender(struct drbd_thread *thi) received += rv; buf += rv; } else if (rv == 0) { - if (test_bit(DISCONNECT_SENT, &mdev->flags)) { + if (drbd_test_flag(mdev, DISCONNECT_SENT)) { long t; /* time_left */ t = wait_event_timeout(mdev->state_wait, mdev->state.conn < C_CONNECTED, mdev->net_conf->ping_timeo * HZ/10); @@ -4689,7 +4689,7 @@ int drbd_asender(struct drbd_thread *thi) dev_err(DEV, "PingAck did not arrive in time.\n"); goto reconnect; } - set_bit(SEND_PING, &mdev->flags); + drbd_set_flag(mdev, SEND_PING); continue; } else if (rv == -EINTR) { continue; @@ -4747,7 +4747,7 @@ disconnect: drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); drbd_md_sync(mdev); } - clear_bit(SIGNAL_ASENDER, &mdev->flags); + drbd_clear_flag(mdev, SIGNAL_ASENDER); D_ASSERT(mdev->state.conn < C_CONNECTED); dev_info(DEV, "asender terminated\n"); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 01b2ac641c7..9220d9f9d6c 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -118,7 +118,7 @@ static void queue_barrier(struct drbd_conf *mdev) * barrier/epoch object is added. This is the only place this bit is * set. It indicates that the barrier for this epoch is already queued, * and no new epoch has been created yet. */ - if (test_bit(CREATE_BARRIER, &mdev->flags)) + if (drbd_test_flag(mdev, CREATE_BARRIER)) return; b = mdev->newest_tle; @@ -129,7 +129,7 @@ static void queue_barrier(struct drbd_conf *mdev) * or (on connection loss) in tl_clear. */ inc_ap_pending(mdev); drbd_queue_work(&mdev->data.work, &b->w); - set_bit(CREATE_BARRIER, &mdev->flags); + drbd_set_flag(mdev, CREATE_BARRIER); } static void _about_to_complete_local_write(struct drbd_conf *mdev, @@ -507,7 +507,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * corresponding hlist_del is in _req_may_be_done() */ hlist_add_head(&req->collision, ar_hash_slot(mdev, req->sector)); - set_bit(UNPLUG_REMOTE, &mdev->flags); + drbd_set_flag(mdev, UNPLUG_REMOTE); D_ASSERT(req->rq_state & RQ_NET_PENDING); req->rq_state |= RQ_NET_QUEUED; @@ -541,11 +541,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* otherwise we may lose an unplug, which may cause some remote * io-scheduler timeout to expire, increasing maximum latency, * hurting performance. */ - set_bit(UNPLUG_REMOTE, &mdev->flags); + drbd_set_flag(mdev, UNPLUG_REMOTE); /* see drbd_make_request_common, * just after it grabs the req_lock */ - D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0); + D_ASSERT(drbd_test_flag(mdev, CREATE_BARRIER) == 0); req->epoch = mdev->newest_tle->br_number; @@ -888,7 +888,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns * Empty flushes don't need to go into the activity log, they can only * flush data for pending writes which are already in there. */ if (rw == WRITE && local && size - && !test_bit(AL_SUSPENDED, &mdev->flags)) { + && !drbd_test_flag(mdev, AL_SUSPENDED)) { req->rq_state |= RQ_IN_ACT_LOG; drbd_al_begin_io(mdev, sector); } @@ -912,7 +912,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns * if we lost that race, we retry. */ if (rw == WRITE && (remote || send_oos) && mdev->unused_spare_tle == NULL && - test_bit(CREATE_BARRIER, &mdev->flags)) { + drbd_test_flag(mdev, CREATE_BARRIER)) { allocate_barrier: b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_NOIO); if (!b) { @@ -955,7 +955,7 @@ allocate_barrier: } if (rw == WRITE && (remote || send_oos) && mdev->unused_spare_tle == NULL && - test_bit(CREATE_BARRIER, &mdev->flags)) { + drbd_test_flag(mdev, CREATE_BARRIER)) { /* someone closed the current epoch * while we were grabbing the spinlock */ spin_unlock_irq(&mdev->req_lock); @@ -977,12 +977,12 @@ allocate_barrier: * make sure that, if this is a write request and it triggered a * barrier packet, this request is queued within the same spinlock. */ if ((remote || send_oos) && mdev->unused_spare_tle && - test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { + drbd_test_and_clear_flag(mdev, CREATE_BARRIER)) { _tl_add_barrier(mdev, mdev->unused_spare_tle); mdev->unused_spare_tle = NULL; } else { D_ASSERT(!(remote && rw == WRITE && - test_bit(CREATE_BARRIER, &mdev->flags))); + drbd_test_flag(mdev, CREATE_BARRIER))); } /* NOTE diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 66dcb2d7ead..acb614ac9fe 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -793,7 +793,7 @@ int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } drbd_start_resync(mdev, C_SYNC_SOURCE); - clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags); + drbd_clear_flag(mdev, AHEAD_TO_SYNC_SOURCE); return 1; } @@ -817,10 +817,10 @@ static int w_resync_finished(struct drbd_conf *mdev, struct drbd_work *w, int ca static void ping_peer(struct drbd_conf *mdev) { - clear_bit(GOT_PING_ACK, &mdev->flags); + drbd_clear_flag(mdev, GOT_PING_ACK); request_ping(mdev); wait_event(mdev->misc_wait, - test_bit(GOT_PING_ACK, &mdev->flags) || mdev->state.conn < C_CONNECTED); + drbd_test_flag(mdev, GOT_PING_ACK) || mdev->state.conn < C_CONNECTED); } int drbd_resync_finished(struct drbd_conf *mdev) @@ -1749,8 +1749,8 @@ int drbd_worker(struct drbd_thread *thi) NS(conn, C_NETWORK_FAILURE)); } } - D_ASSERT(test_bit(DEVICE_DYING, &mdev->flags)); - D_ASSERT(test_bit(CONFIG_PENDING, &mdev->flags)); + D_ASSERT(drbd_test_flag(mdev, DEVICE_DYING)); + D_ASSERT(drbd_test_flag(mdev, CONFIG_PENDING)); spin_lock_irq(&mdev->data.work.q_lock); i = 0; @@ -1783,8 +1783,8 @@ int drbd_worker(struct drbd_thread *thi) dev_info(DEV, "worker terminated\n"); - clear_bit(DEVICE_DYING, &mdev->flags); - clear_bit(CONFIG_PENDING, &mdev->flags); + drbd_clear_flag(mdev, DEVICE_DYING); + drbd_clear_flag(mdev, CONFIG_PENDING); wake_up(&mdev->state_wait); return 0; From a2a3c74f243d5d1793f89ccdceaa6918851f7fce Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Sat, 22 Sep 2012 12:26:57 +0200 Subject: [PATCH 170/609] drbd: always write bitmap on detach If we detach due to local read-error (which sets a bit in the bitmap), stay Primary, and then re-attach (which re-reads the bitmap from disk), we potentially lost the "out-of-sync" (or, "bad block") information in the bitmap. Always (try to) write out the changed bitmap pages before going diskless. That way, we don't lose the bit for the bad block, the next resync will fetch it from the peer, and rewrite it locally, which may result in block reallocation in some lower layer (or the hardware), and thereby "heal" the bad blocks. If the bitmap writeout errors out as well, we will (again: try to) mark the "we need a full sync" bit in our super block, if it was a READ error; writes are covered by the activity log already. If that superblock does not make it to disk either, we are sorry. Maybe we just lost an entire disk or controller (or iSCSI connection), and there actually are no bad blocks at all, so we don't need to re-fetch from the peer, there is no "auto-healing" necessary. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 36 ++++++++++++++++++++++++----- drivers/block/drbd/drbd_main.c | 39 +++++++++++++++++++++++++------- drivers/block/drbd/drbd_nl.c | 2 ++ drivers/block/drbd/drbd_req.c | 4 ++-- drivers/block/drbd/drbd_worker.c | 4 ++-- 5 files changed, 67 insertions(+), 18 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 125fe1481ca..277c69c9465 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -831,7 +831,8 @@ enum drbd_flag { once no more io in flight, start bitmap io */ BITMAP_IO_QUEUED, /* Started bitmap IO */ GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */ - WAS_IO_ERROR, /* Local disk failed returned IO error */ + WAS_IO_ERROR, /* Local disk failed, returned IO error */ + WAS_READ_ERROR, /* Local disk READ failed (set additionally to the above) */ FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */ RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ NET_CONGESTED, /* The data socket is congested */ @@ -1879,30 +1880,53 @@ static inline int drbd_request_state(struct drbd_conf *mdev, } enum drbd_force_detach_flags { - DRBD_IO_ERROR, + DRBD_READ_ERROR, + DRBD_WRITE_ERROR, DRBD_META_IO_ERROR, DRBD_FORCE_DETACH, }; #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, - enum drbd_force_detach_flags forcedetach, + enum drbd_force_detach_flags df, const char *where) { switch (mdev->ldev->dc.on_io_error) { case EP_PASS_ON: - if (forcedetach == DRBD_IO_ERROR) { + if (df == DRBD_READ_ERROR || df == DRBD_WRITE_ERROR) { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Local IO failed in %s.\n", where); if (mdev->state.disk > D_INCONSISTENT) _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL); break; } - /* NOTE fall through to detach case if forcedetach set */ + /* NOTE fall through for DRBD_META_IO_ERROR or DRBD_FORCE_DETACH */ case EP_DETACH: case EP_CALL_HELPER: + /* Remember whether we saw a READ or WRITE error. + * + * Recovery of the affected area for WRITE failure is covered + * by the activity log. + * READ errors may fall outside that area though. Certain READ + * errors can be "healed" by writing good data to the affected + * blocks, which triggers block re-allocation in lower layers. + * + * If we can not write the bitmap after a READ error, + * we may need to trigger a full sync (see w_go_diskless()). + * + * Force-detach is not really an IO error, but rather a + * desperate measure to try to deal with a completely + * unresponsive lower level IO stack. + * Still it should be treated as a WRITE error. + * + * Meta IO error is always WRITE error: + * we read meta data only once during attach, + * which will fail in case of errors. + */ drbd_set_flag(mdev, WAS_IO_ERROR); - if (forcedetach == DRBD_FORCE_DETACH) + if (df == DRBD_READ_ERROR) + drbd_set_flag(mdev, WAS_READ_ERROR); + if (df == DRBD_FORCE_DETACH) drbd_set_flag(mdev, FORCE_DETACH); if (mdev->state.disk > D_FAILED) { _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index d8ba5c42670..9b833e0fb44 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1617,17 +1617,20 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* first half of local IO error, failure to attach, * or administrative detach */ if (os.disk != D_FAILED && ns.disk == D_FAILED) { - enum drbd_io_error_p eh = EP_PASS_ON; - int was_io_error = 0; /* corresponding get_ldev was in __drbd_set_state, to serialize * our cleanup here with the transition to D_DISKLESS. - * But is is still not save to dreference ldev here, since - * we might come from an failed Attach before ldev was set. */ + * But it is still not safe to dreference ldev here, we may end + * up here from a failed attach, before ldev was even set. */ if (mdev->ldev) { - eh = mdev->ldev->dc.on_io_error; - was_io_error = drbd_test_and_clear_flag(mdev, WAS_IO_ERROR); + enum drbd_io_error_p eh = mdev->ldev->dc.on_io_error; - if (was_io_error && eh == EP_CALL_HELPER) + /* In some setups, this handler triggers a suicide, + * basically mapping IO error to node failure, to + * reduce the number of different failure scenarios. + * + * This handler intentionally runs before we abort IO, + * notify the peer, or try to update our meta data. */ + if (eh == EP_CALL_HELPER && drbd_test_flag(mdev, WAS_IO_ERROR)) drbd_khelper(mdev, "local-io-error"); /* Immediately allow completion of all application IO, @@ -1643,7 +1646,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, * So aborting local requests may cause crashes, * or even worse, silent data corruption. */ - if (drbd_test_and_clear_flag(mdev, FORCE_DETACH)) + if (drbd_test_flag(mdev, FORCE_DETACH)) tl_abort_disk_io(mdev); /* current state still has to be D_FAILED, @@ -4220,6 +4223,26 @@ static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused * inc/dec it frequently. Once we are D_DISKLESS, no one will touch * the protected members anymore, though, so once put_ldev reaches zero * again, it will be safe to free them. */ + + /* Try to write changed bitmap pages, read errors may have just + * set some bits outside the area covered by the activity log. + * + * If we have an IO error during the bitmap writeout, + * we will want a full sync next time, just in case. + * (Do we want a specific meta data flag for this?) + * + * If that does not make it to stable storage either, + * we cannot do anything about that anymore. */ + if (mdev->bitmap) { + if (drbd_bitmap_io_from_worker(mdev, drbd_bm_write, + "detach", BM_LOCKED_MASK)) { + if (drbd_test_flag(mdev, WAS_READ_ERROR)) { + drbd_md_set_flag(mdev, MDF_FULL_SYNC); + drbd_md_sync(mdev); + } + } + } + drbd_force_state(mdev, NS(disk, D_DISKLESS)); return 1; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 42d172877ae..c8dda4e8dfc 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -959,6 +959,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp /* make sure there is no leftover from previous force-detach attempts */ drbd_clear_flag(mdev, FORCE_DETACH); + drbd_clear_flag(mdev, WAS_IO_ERROR); + drbd_clear_flag(mdev, WAS_READ_ERROR); /* and no leftover from previously aborted resync or verify, either */ mdev->rs_total = 0; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 9220d9f9d6c..d9e5962a9a8 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -455,7 +455,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; - __drbd_chk_io_error(mdev, DRBD_IO_ERROR); + __drbd_chk_io_error(mdev, DRBD_WRITE_ERROR); _req_may_be_done_not_susp(req, m); break; @@ -477,7 +477,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; } - __drbd_chk_io_error(mdev, DRBD_IO_ERROR); + __drbd_chk_io_error(mdev, DRBD_READ_ERROR); goto_queue_for_net_read: diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index acb614ac9fe..7cd32e73b01 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -111,7 +111,7 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local) if (list_empty(&mdev->read_ee)) wake_up(&mdev->ee_wait); if (test_bit(__EE_WAS_ERROR, &e->flags)) - __drbd_chk_io_error(mdev, DRBD_IO_ERROR); + __drbd_chk_io_error(mdev, DRBD_READ_ERROR); spin_unlock_irqrestore(&mdev->req_lock, flags); drbd_queue_work(&mdev->data.work, &e->w); @@ -154,7 +154,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo : list_empty(&mdev->active_ee); if (test_bit(__EE_WAS_ERROR, &e->flags)) - __drbd_chk_io_error(mdev, DRBD_IO_ERROR); + __drbd_chk_io_error(mdev, DRBD_WRITE_ERROR); spin_unlock_irqrestore(&mdev->req_lock, flags); if (is_syncer_req) From ccae7868b0c5697508a541c531cf96b361d62c1c Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 26 Sep 2012 14:07:04 +0200 Subject: [PATCH 171/609] drbd: log request sector offset and size for IO errors Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 19 ++++++++++++++++++- include/linux/drbd.h | 2 +- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index d9e5962a9a8..135ea76ed50 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -387,6 +387,20 @@ out_conflict: return 1; } +static void drbd_report_io_error(struct drbd_conf *mdev, struct drbd_request *req) +{ + char b[BDEVNAME_SIZE]; + + if (__ratelimit(&drbd_ratelimit_state)) + return; + + dev_warn(DEV, "local %s IO error sector %llu+%u on %s\n", + (req->rq_state & RQ_WRITE) ? "WRITE" : "READ", + (unsigned long long)req->sector, + req->size >> 9, + bdevname(mdev->ldev->backing_bdev, b)); +} + /* obviously this could be coded as many single functions * instead of one huge switch, * or by putting the code directly in the respective locations @@ -455,6 +469,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; + drbd_report_io_error(mdev, req); __drbd_chk_io_error(mdev, DRBD_WRITE_ERROR); _req_may_be_done_not_susp(req, m); break; @@ -477,6 +492,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; } + drbd_report_io_error(mdev, req); __drbd_chk_io_error(mdev, DRBD_READ_ERROR); goto_queue_for_net_read: @@ -900,7 +916,8 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns if (!(local || remote) && !is_susp(mdev->state)) { if (__ratelimit(&drbd_ratelimit_state)) - dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); + dev_err(DEV, "IO ERROR: neither local nor remote data, sector %llu+%u\n", + (unsigned long long)req->sector, req->size >> 9); goto fail_free_complete; } diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 4a7eccbd129..94f58a102bb 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -53,7 +53,7 @@ extern const char *drbd_buildtag(void); -#define REL_VERSION "8.3.13" +#define REL_VERSION "8.3.14" #define API_VERSION 88 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 97 From 0a8704a51f386cab7394e38ff1d66eef924d8ab8 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Wed, 24 Oct 2012 18:58:45 +0200 Subject: [PATCH 172/609] xen/blkback: Persistent grant maps for xen blk drivers This patch implements persistent grants for the xen-blk{front,back} mechanism. The effect of this change is to reduce the number of unmap operations performed, since they cause a (costly) TLB shootdown. This allows the I/O performance to scale better when a large number of VMs are performing I/O. Previously, the blkfront driver was supplied a bvec[] from the request queue. This was granted to dom0; dom0 performed the I/O and wrote directly into the grant-mapped memory and unmapped it; blkfront then removed foreign access for that grant. The cost of unmapping scales badly with the number of CPUs in Dom0. An experiment showed that when Dom0 has 24 VCPUs, and guests are performing parallel I/O to a ramdisk, the IPIs from performing unmap's is a bottleneck at 5 guests (at which point 650,000 IOPS are being performed in total). If more than 5 guests are used, the performance declines. By 10 guests, only 400,000 IOPS are being performed. This patch improves performance by only unmapping when the connection between blkfront and back is broken. On startup blkfront notifies blkback that it is using persistent grants, and blkback will do the same. If blkback is not capable of persistent mapping, blkfront will still use the same grants, since it is compatible with the previous protocol, and simplifies the code complexity in blkfront. To perform a read, in persistent mode, blkfront uses a separate pool of pages that it maps to dom0. When a request comes in, blkfront transmutes the request so that blkback will write into one of these free pages. Blkback keeps note of which grefs it has already mapped. When a new ring request comes to blkback, it looks to see if it has already mapped that page. If so, it will not map it again. If the page hasn't been previously mapped, it is mapped now, and a record is kept of this mapping. Blkback proceeds as usual. When blkfront is notified that blkback has completed a request, it memcpy's from the shared memory, into the bvec supplied. A record that the {gref, page} tuple is mapped, and not inflight is kept. Writes are similar, except that the memcpy is peformed from the supplied bvecs, into the shared pages, before the request is put onto the ring. Blkback stores a mapping of grefs=>{page mapped to by gref} in a red-black tree. As the grefs are not known apriori, and provide no guarantees on their ordering, we have to perform a search through this tree to find the page, for every gref we receive. This operation takes O(log n) time in the worst case. In blkfront grants are stored using a single linked list. The maximum number of grants that blkback will persistenly map is currently set to RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST, to prevent a malicios guest from attempting a DoS, by supplying fresh grefs, causing the Dom0 kernel to map excessively. If a guest is using persistent grants and exceeds the maximum number of grants to map persistenly the newly passed grefs will be mapped and unmaped. Using this approach, we can have requests that mix persistent and non-persistent grants, and we need to handle them correctly. This allows us to set the maximum number of persistent grants to a lower value than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST, although setting it will lead to unpredictable performance. In writing this patch, the question arrises as to if the additional cost of performing memcpys in the guest (to/from the pool of granted pages) outweigh the gains of not performing TLB shootdowns. The answer to that question is `no'. There appears to be very little, if any additional cost to the guest of using persistent grants. There is perhaps a small saving, from the reduced number of hypercalls performed in granting, and ending foreign access. Signed-off-by: Oliver Chick Signed-off-by: Roger Pau Monne Signed-off-by: Konrad Rzeszutek Wilk [v1: Fixed up the misuse of bool as int] --- drivers/block/xen-blkback/blkback.c | 286 +++++++++++++++++++++++++--- drivers/block/xen-blkback/common.h | 17 ++ drivers/block/xen-blkback/xenbus.c | 23 ++- drivers/block/xen-blkfront.c | 197 ++++++++++++++++--- 4 files changed, 471 insertions(+), 52 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 280a13846e6..d7dd5cbdac5 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -79,6 +80,7 @@ struct pending_req { unsigned short operation; int status; struct list_head free_list; + DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST); }; #define BLKBACK_INVALID_HANDLE (~0) @@ -98,6 +100,36 @@ struct xen_blkbk { static struct xen_blkbk *blkbk; +/* + * Maximum number of grant pages that can be mapped in blkback. + * BLKIF_MAX_SEGMENTS_PER_REQUEST * RING_SIZE is the maximum number of + * pages that blkback will persistently map. + * Currently, this is: + * RING_SIZE = 32 (for all known ring types) + * BLKIF_MAX_SEGMENTS_PER_REQUEST = 11 + * sizeof(struct persistent_gnt) = 48 + * So the maximum memory used to store the grants is: + * 32 * 11 * 48 = 16896 bytes + */ +static inline unsigned int max_mapped_grant_pages(enum blkif_protocol protocol) +{ + switch (protocol) { + case BLKIF_PROTOCOL_NATIVE: + return __CONST_RING_SIZE(blkif, PAGE_SIZE) * + BLKIF_MAX_SEGMENTS_PER_REQUEST; + case BLKIF_PROTOCOL_X86_32: + return __CONST_RING_SIZE(blkif_x86_32, PAGE_SIZE) * + BLKIF_MAX_SEGMENTS_PER_REQUEST; + case BLKIF_PROTOCOL_X86_64: + return __CONST_RING_SIZE(blkif_x86_64, PAGE_SIZE) * + BLKIF_MAX_SEGMENTS_PER_REQUEST; + default: + BUG(); + } + return 0; +} + + /* * Little helpful macro to figure out the index and virtual address of the * pending_pages[..]. For each 'pending_req' we have have up to @@ -129,6 +161,57 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, static void make_response(struct xen_blkif *blkif, u64 id, unsigned short op, int st); +#define foreach_grant(pos, rbtree, node) \ + for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node); \ + &(pos)->node != NULL; \ + (pos) = container_of(rb_next(&(pos)->node), typeof(*(pos)), node)) + + +static void add_persistent_gnt(struct rb_root *root, + struct persistent_gnt *persistent_gnt) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + struct persistent_gnt *this; + + /* Figure out where to put new node */ + while (*new) { + this = container_of(*new, struct persistent_gnt, node); + + parent = *new; + if (persistent_gnt->gnt < this->gnt) + new = &((*new)->rb_left); + else if (persistent_gnt->gnt > this->gnt) + new = &((*new)->rb_right); + else { + pr_alert(DRV_PFX " trying to add a gref that's already in the tree\n"); + BUG(); + } + } + + /* Add new node and rebalance tree. */ + rb_link_node(&(persistent_gnt->node), parent, new); + rb_insert_color(&(persistent_gnt->node), root); +} + +static struct persistent_gnt *get_persistent_gnt(struct rb_root *root, + grant_ref_t gref) +{ + struct persistent_gnt *data; + struct rb_node *node = root->rb_node; + + while (node) { + data = container_of(node, struct persistent_gnt, node); + + if (gref < data->gnt) + node = node->rb_left; + else if (gref > data->gnt) + node = node->rb_right; + else + return data; + } + return NULL; +} + /* * Retrieve from the 'pending_reqs' a free pending_req structure to be used. */ @@ -275,6 +358,11 @@ int xen_blkif_schedule(void *arg) { struct xen_blkif *blkif = arg; struct xen_vbd *vbd = &blkif->vbd; + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct persistent_gnt *persistent_gnt; + int ret = 0; + int segs_to_unmap = 0; xen_blkif_get(blkif); @@ -302,6 +390,36 @@ int xen_blkif_schedule(void *arg) print_stats(blkif); } + /* Free all persistent grant pages */ + if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) { + foreach_grant(persistent_gnt, &blkif->persistent_gnts, node) { + BUG_ON(persistent_gnt->handle == + BLKBACK_INVALID_HANDLE); + gnttab_set_unmap_op(&unmap[segs_to_unmap], + (unsigned long) pfn_to_kaddr(page_to_pfn( + persistent_gnt->page)), + GNTMAP_host_map, + persistent_gnt->handle); + + pages[segs_to_unmap] = persistent_gnt->page; + rb_erase(&persistent_gnt->node, + &blkif->persistent_gnts); + kfree(persistent_gnt); + blkif->persistent_gnt_c--; + + if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST || + !rb_next(&persistent_gnt->node)) { + ret = gnttab_unmap_refs(unmap, NULL, pages, + segs_to_unmap); + BUG_ON(ret); + segs_to_unmap = 0; + } + } + } + + BUG_ON(blkif->persistent_gnt_c != 0); + BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); + if (log_stats) print_stats(blkif); @@ -328,6 +446,8 @@ static void xen_blkbk_unmap(struct pending_req *req) int ret; for (i = 0; i < req->nr_pages; i++) { + if (!test_bit(i, req->unmap_seg)) + continue; handle = pending_handle(req, i); if (handle == BLKBACK_INVALID_HANDLE) continue; @@ -344,12 +464,26 @@ static void xen_blkbk_unmap(struct pending_req *req) static int xen_blkbk_map(struct blkif_request *req, struct pending_req *pending_req, - struct seg_buf seg[]) + struct seg_buf seg[], + struct page *pages[]) { struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - int i; + struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct persistent_gnt *persistent_gnt = NULL; + struct xen_blkif *blkif = pending_req->blkif; + phys_addr_t addr = 0; + int i, j; + bool new_map; int nseg = req->u.rw.nr_segments; + int segs_to_map = 0; int ret = 0; + int use_persistent_gnts; + + use_persistent_gnts = (blkif->vbd.feature_gnt_persistent); + + BUG_ON(blkif->persistent_gnt_c > + max_mapped_grant_pages(pending_req->blkif->blk_protocol)); /* * Fill out preq.nr_sects with proper amount of sectors, and setup @@ -359,36 +493,143 @@ static int xen_blkbk_map(struct blkif_request *req, for (i = 0; i < nseg; i++) { uint32_t flags; - flags = GNTMAP_host_map; - if (pending_req->operation != BLKIF_OP_READ) - flags |= GNTMAP_readonly; - gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, - req->u.rw.seg[i].gref, - pending_req->blkif->domid); + if (use_persistent_gnts) + persistent_gnt = get_persistent_gnt( + &blkif->persistent_gnts, + req->u.rw.seg[i].gref); + + if (persistent_gnt) { + /* + * We are using persistent grants and + * the grant is already mapped + */ + new_map = false; + } else if (use_persistent_gnts && + blkif->persistent_gnt_c < + max_mapped_grant_pages(blkif->blk_protocol)) { + /* + * We are using persistent grants, the grant is + * not mapped but we have room for it + */ + new_map = true; + persistent_gnt = kzalloc( + sizeof(struct persistent_gnt), + GFP_KERNEL); + if (!persistent_gnt) + return -ENOMEM; + persistent_gnt->page = alloc_page(GFP_KERNEL); + if (!persistent_gnt->page) { + kfree(persistent_gnt); + return -ENOMEM; + } + persistent_gnt->gnt = req->u.rw.seg[i].gref; + + pages_to_gnt[segs_to_map] = + persistent_gnt->page; + addr = (unsigned long) pfn_to_kaddr( + page_to_pfn(persistent_gnt->page)); + + add_persistent_gnt(&blkif->persistent_gnts, + persistent_gnt); + blkif->persistent_gnt_c++; + pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n", + persistent_gnt->gnt, blkif->persistent_gnt_c, + max_mapped_grant_pages(blkif->blk_protocol)); + } else { + /* + * We are either using persistent grants and + * hit the maximum limit of grants mapped, + * or we are not using persistent grants. + */ + if (use_persistent_gnts && + !blkif->vbd.overflow_max_grants) { + blkif->vbd.overflow_max_grants = 1; + pr_alert(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n", + blkif->domid, blkif->vbd.handle); + } + new_map = true; + pages[i] = blkbk->pending_page(pending_req, i); + addr = vaddr(pending_req, i); + pages_to_gnt[segs_to_map] = + blkbk->pending_page(pending_req, i); + } + + if (persistent_gnt) { + pages[i] = persistent_gnt->page; + persistent_gnts[i] = persistent_gnt; + } else { + persistent_gnts[i] = NULL; + } + + if (new_map) { + flags = GNTMAP_host_map; + if (!persistent_gnt && + (pending_req->operation != BLKIF_OP_READ)) + flags |= GNTMAP_readonly; + gnttab_set_map_op(&map[segs_to_map++], addr, + flags, req->u.rw.seg[i].gref, + blkif->domid); + } } - ret = gnttab_map_refs(map, NULL, &blkbk->pending_page(pending_req, 0), nseg); - BUG_ON(ret); + if (segs_to_map) { + ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map); + BUG_ON(ret); + } /* * Now swizzle the MFN in our domain with the MFN from the other domain * so that when we access vaddr(pending_req,i) it has the contents of * the page from the other domain. */ - for (i = 0; i < nseg; i++) { - if (unlikely(map[i].status != 0)) { - pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); - map[i].handle = BLKBACK_INVALID_HANDLE; - ret |= 1; + bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST); + for (i = 0, j = 0; i < nseg; i++) { + if (!persistent_gnts[i] || !persistent_gnts[i]->handle) { + /* This is a newly mapped grant */ + BUG_ON(j >= segs_to_map); + if (unlikely(map[j].status != 0)) { + pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); + map[j].handle = BLKBACK_INVALID_HANDLE; + ret |= 1; + if (persistent_gnts[i]) { + rb_erase(&persistent_gnts[i]->node, + &blkif->persistent_gnts); + blkif->persistent_gnt_c--; + kfree(persistent_gnts[i]); + persistent_gnts[i] = NULL; + } + } } + if (persistent_gnts[i]) { + if (!persistent_gnts[i]->handle) { + /* + * If this is a new persistent grant + * save the handler + */ + persistent_gnts[i]->handle = map[j].handle; + persistent_gnts[i]->dev_bus_addr = + map[j++].dev_bus_addr; + } + pending_handle(pending_req, i) = + persistent_gnts[i]->handle; - pending_handle(pending_req, i) = map[i].handle; + if (ret) + continue; - if (ret) - continue; + seg[i].buf = persistent_gnts[i]->dev_bus_addr | + (req->u.rw.seg[i].first_sect << 9); + } else { + pending_handle(pending_req, i) = map[j].handle; + bitmap_set(pending_req->unmap_seg, i, 1); - seg[i].buf = map[i].dev_bus_addr | - (req->u.rw.seg[i].first_sect << 9); + if (ret) { + j++; + continue; + } + + seg[i].buf = map[j++].dev_bus_addr | + (req->u.rw.seg[i].first_sect << 9); + } } return ret; } @@ -591,6 +832,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, int operation; struct blk_plug plug; bool drain = false; + struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; switch (req->operation) { case BLKIF_OP_READ: @@ -677,7 +919,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, * the hypercall to unmap the grants - that is all done in * xen_blkbk_unmap. */ - if (xen_blkbk_map(req, pending_req, seg)) + if (xen_blkbk_map(req, pending_req, seg, pages)) goto fail_flush; /* @@ -689,7 +931,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, for (i = 0; i < nseg; i++) { while ((bio == NULL) || (bio_add_page(bio, - blkbk->pending_page(pending_req, i), + pages[i], seg[i].nsec << 9, seg[i].buf & ~PAGE_MASK) == 0)) { diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 9ad3b5ec1dc..ae7951f0e26 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -160,10 +161,22 @@ struct xen_vbd { sector_t size; bool flush_support; bool discard_secure; + + unsigned int feature_gnt_persistent:1; + unsigned int overflow_max_grants:1; }; struct backend_info; + +struct persistent_gnt { + struct page *page; + grant_ref_t gnt; + grant_handle_t handle; + uint64_t dev_bus_addr; + struct rb_node node; +}; + struct xen_blkif { /* Unique identifier for this interface. */ domid_t domid; @@ -190,6 +203,10 @@ struct xen_blkif { struct task_struct *xenblkd; unsigned int waiting_reqs; + /* tree to store persistent grants */ + struct rb_root persistent_gnts; + unsigned int persistent_gnt_c; + /* statistics */ unsigned long st_print; int st_rd_req; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 4f66171c668..b2250265308 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -118,6 +118,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) atomic_set(&blkif->drain, 0); blkif->st_print = jiffies; init_waitqueue_head(&blkif->waiting_to_free); + blkif->persistent_gnts.rb_node = NULL; return blkif; } @@ -673,6 +674,13 @@ again: xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support); + err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", 1); + if (err) { + xenbus_dev_fatal(dev, err, "writing %s/feature-persistent", + dev->nodename); + goto abort; + } + err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", (unsigned long long)vbd_sz(&be->blkif->vbd)); if (err) { @@ -721,6 +729,7 @@ static int connect_ring(struct backend_info *be) struct xenbus_device *dev = be->dev; unsigned long ring_ref; unsigned int evtchn; + unsigned int pers_grants; char protocol[64] = ""; int err; @@ -750,8 +759,18 @@ static int connect_ring(struct backend_info *be) xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); return -1; } - pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n", - ring_ref, evtchn, be->blkif->blk_protocol, protocol); + err = xenbus_gather(XBT_NIL, dev->otherend, + "feature-persistent-grants", "%u", + &pers_grants, NULL); + if (err) + pers_grants = 0; + + be->blkif->vbd.feature_gnt_persistent = pers_grants; + be->blkif->vbd.overflow_max_grants = 0; + + pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s) %s\n", + ring_ref, evtchn, be->blkif->blk_protocol, protocol, + pers_grants ? "persistent grants" : ""); /* Map the shared frame, irq etc. */ err = xen_blkif_map(be->blkif, ring_ref, evtchn); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 007db8986e8..911d733d21b 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -64,10 +65,17 @@ enum blkif_state { BLKIF_STATE_SUSPENDED, }; +struct grant { + grant_ref_t gref; + unsigned long pfn; + struct llist_node node; +}; + struct blk_shadow { struct blkif_request req; struct request *request; unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST]; }; static DEFINE_MUTEX(blkfront_mutex); @@ -97,6 +105,8 @@ struct blkfront_info struct work_struct work; struct gnttab_free_callback callback; struct blk_shadow shadow[BLK_RING_SIZE]; + struct llist_head persistent_gnts; + unsigned int persistent_gnts_c; unsigned long shadow_free; unsigned int feature_flush; unsigned int flush_op; @@ -104,6 +114,7 @@ struct blkfront_info unsigned int feature_secdiscard:1; unsigned int discard_granularity; unsigned int discard_alignment; + unsigned int feature_persistent:1; int is_ready; }; @@ -287,21 +298,36 @@ static int blkif_queue_request(struct request *req) unsigned long id; unsigned int fsect, lsect; int i, ref; + + /* + * Used to store if we are able to queue the request by just using + * existing persistent grants, or if we have to get new grants, + * as there are not sufficiently many free. + */ + bool new_persistent_gnts; grant_ref_t gref_head; + struct page *granted_page; + struct grant *gnt_list_entry = NULL; struct scatterlist *sg; if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) return 1; - if (gnttab_alloc_grant_references( - BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { - gnttab_request_free_callback( - &info->callback, - blkif_restart_queue_callback, - info, - BLKIF_MAX_SEGMENTS_PER_REQUEST); - return 1; - } + /* Check if we have enought grants to allocate a requests */ + if (info->persistent_gnts_c < BLKIF_MAX_SEGMENTS_PER_REQUEST) { + new_persistent_gnts = 1; + if (gnttab_alloc_grant_references( + BLKIF_MAX_SEGMENTS_PER_REQUEST - info->persistent_gnts_c, + &gref_head) < 0) { + gnttab_request_free_callback( + &info->callback, + blkif_restart_queue_callback, + info, + BLKIF_MAX_SEGMENTS_PER_REQUEST); + return 1; + } + } else + new_persistent_gnts = 0; /* Fill out a communications ring structure. */ ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); @@ -341,18 +367,73 @@ static int blkif_queue_request(struct request *req) BLKIF_MAX_SEGMENTS_PER_REQUEST); for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) { - buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); fsect = sg->offset >> 9; lsect = fsect + (sg->length >> 9) - 1; - /* install a grant reference. */ - ref = gnttab_claim_grant_reference(&gref_head); - BUG_ON(ref == -ENOSPC); - gnttab_grant_foreign_access_ref( - ref, + if (info->persistent_gnts_c) { + BUG_ON(llist_empty(&info->persistent_gnts)); + gnt_list_entry = llist_entry( + llist_del_first(&info->persistent_gnts), + struct grant, node); + + ref = gnt_list_entry->gref; + buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn); + info->persistent_gnts_c--; + } else { + ref = gnttab_claim_grant_reference(&gref_head); + BUG_ON(ref == -ENOSPC); + + gnt_list_entry = + kmalloc(sizeof(struct grant), + GFP_ATOMIC); + if (!gnt_list_entry) + return -ENOMEM; + + granted_page = alloc_page(GFP_ATOMIC); + if (!granted_page) { + kfree(gnt_list_entry); + return -ENOMEM; + } + + gnt_list_entry->pfn = + page_to_pfn(granted_page); + gnt_list_entry->gref = ref; + + buffer_mfn = pfn_to_mfn(page_to_pfn( + granted_page)); + gnttab_grant_foreign_access_ref(ref, info->xbdev->otherend_id, - buffer_mfn, - rq_data_dir(req)); + buffer_mfn, 0); + } + + info->shadow[id].grants_used[i] = gnt_list_entry; + + if (rq_data_dir(req)) { + char *bvec_data; + void *shared_data; + + BUG_ON(sg->offset + sg->length > PAGE_SIZE); + + shared_data = kmap_atomic( + pfn_to_page(gnt_list_entry->pfn)); + bvec_data = kmap_atomic(sg_page(sg)); + + /* + * this does not wipe data stored outside the + * range sg->offset..sg->offset+sg->length. + * Therefore, blkback *could* see data from + * previous requests. This is OK as long as + * persistent grants are shared with just one + * domain. It may need refactoring if this + * changes + */ + memcpy(shared_data + sg->offset, + bvec_data + sg->offset, + sg->length); + + kunmap_atomic(bvec_data); + kunmap_atomic(shared_data); + } info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); ring_req->u.rw.seg[i] = @@ -368,7 +449,8 @@ static int blkif_queue_request(struct request *req) /* Keep a private copy so we can reissue requests when recovering. */ info->shadow[id].req = *ring_req; - gnttab_free_grant_references(gref_head); + if (new_persistent_gnts) + gnttab_free_grant_references(gref_head); return 0; } @@ -480,12 +562,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) static void xlvbd_flush(struct blkfront_info *info) { blk_queue_flush(info->rq, info->feature_flush); - printk(KERN_INFO "blkfront: %s: %s: %s\n", + printk(KERN_INFO "blkfront: %s: %s: %s %s\n", info->gd->disk_name, info->flush_op == BLKIF_OP_WRITE_BARRIER ? "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ? "flush diskcache" : "barrier or flush"), - info->feature_flush ? "enabled" : "disabled"); + info->feature_flush ? "enabled" : "disabled", + info->feature_persistent ? "using persistent grants" : ""); } static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) @@ -707,6 +790,9 @@ static void blkif_restart_queue(struct work_struct *work) static void blkif_free(struct blkfront_info *info, int suspend) { + struct llist_node *all_gnts; + struct grant *persistent_gnt; + /* Prevent new requests being issued until we fix things up. */ spin_lock_irq(&info->io_lock); info->connected = suspend ? @@ -714,6 +800,17 @@ static void blkif_free(struct blkfront_info *info, int suspend) /* No more blkif_request(). */ if (info->rq) blk_stop_queue(info->rq); + + /* Remove all persistent grants */ + if (info->persistent_gnts_c) { + all_gnts = llist_del_all(&info->persistent_gnts); + llist_for_each_entry(persistent_gnt, all_gnts, node) { + gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); + kfree(persistent_gnt); + } + info->persistent_gnts_c = 0; + } + /* No more gnttab callback work. */ gnttab_cancel_free_callback(&info->callback); spin_unlock_irq(&info->io_lock); @@ -734,13 +831,42 @@ static void blkif_free(struct blkfront_info *info, int suspend) } -static void blkif_completion(struct blk_shadow *s) +static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, + struct blkif_response *bret) { int i; - /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place - * flag. */ - for (i = 0; i < s->req.u.rw.nr_segments; i++) - gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL); + struct bio_vec *bvec; + struct req_iterator iter; + unsigned long flags; + char *bvec_data; + void *shared_data; + unsigned int offset = 0; + + if (bret->operation == BLKIF_OP_READ) { + /* + * Copy the data received from the backend into the bvec. + * Since bv_offset can be different than 0, and bv_len different + * than PAGE_SIZE, we have to keep track of the current offset, + * to be sure we are copying the data from the right shared page. + */ + rq_for_each_segment(bvec, s->request, iter) { + BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE); + i = offset >> PAGE_SHIFT; + shared_data = kmap_atomic( + pfn_to_page(s->grants_used[i]->pfn)); + bvec_data = bvec_kmap_irq(bvec, &flags); + memcpy(bvec_data, shared_data + bvec->bv_offset, + bvec->bv_len); + bvec_kunmap_irq(bvec_data, &flags); + kunmap_atomic(shared_data); + offset += bvec->bv_len; + } + } + /* Add the persistent grant into the list of free grants */ + for (i = 0; i < s->req.u.rw.nr_segments; i++) { + llist_add(&s->grants_used[i]->node, &info->persistent_gnts); + info->persistent_gnts_c++; + } } static irqreturn_t blkif_interrupt(int irq, void *dev_id) @@ -783,7 +909,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) req = info->shadow[id].request; if (bret->operation != BLKIF_OP_DISCARD) - blkif_completion(&info->shadow[id]); + blkif_completion(&info->shadow[id], info, bret); if (add_id_to_freelist(info, id)) { WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n", @@ -942,6 +1068,11 @@ again: message = "writing protocol"; goto abort_transaction; } + err = xenbus_printf(xbt, dev->nodename, + "feature-persistent-grants", "%u", 1); + if (err) + dev_warn(&dev->dev, + "writing persistent grants feature to xenbus"); err = xenbus_transaction_end(xbt, 0); if (err) { @@ -1029,6 +1160,8 @@ static int blkfront_probe(struct xenbus_device *dev, spin_lock_init(&info->io_lock); info->xbdev = dev; info->vdevice = vdevice; + init_llist_head(&info->persistent_gnts); + info->persistent_gnts_c = 0; info->connected = BLKIF_STATE_DISCONNECTED; INIT_WORK(&info->work, blkif_restart_queue); @@ -1093,7 +1226,7 @@ static int blkif_recover(struct blkfront_info *info) req->u.rw.seg[j].gref, info->xbdev->otherend_id, pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]), - rq_data_dir(info->shadow[req->u.rw.id].request)); + 0); } info->shadow[req->u.rw.id].req = *req; @@ -1225,7 +1358,7 @@ static void blkfront_connect(struct blkfront_info *info) unsigned long sector_size; unsigned int binfo; int err; - int barrier, flush, discard; + int barrier, flush, discard, persistent; switch (info->connected) { case BLKIF_STATE_CONNECTED: @@ -1303,6 +1436,14 @@ static void blkfront_connect(struct blkfront_info *info) if (!err && discard) blkfront_setup_discard(info); + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, + "feature-persistent", "%u", &persistent, + NULL); + if (err) + info->feature_persistent = 0; + else + info->feature_persistent = persistent; + err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); if (err) { xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", From 3b98c0c2093d1f92e5b7394ae0b13d142e7ef880 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 7 Mar 2011 12:49:34 +0100 Subject: [PATCH 173/609] drbd: switch configuration interface from connector to genetlink Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 3 +- drivers/block/drbd/drbd_int.h | 36 +- drivers/block/drbd/drbd_main.c | 27 +- drivers/block/drbd/drbd_nl.c | 1540 +++++++++++++++--------------- drivers/block/drbd/drbd_state.c | 7 +- include/linux/drbd.h | 35 +- include/linux/genl_magic_func.h | 2 +- 7 files changed, 808 insertions(+), 842 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 7cd78617669..c1a90616776 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -702,6 +702,7 @@ static int w_update_odbm(struct drbd_work *w, int unused) { struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w); struct drbd_conf *mdev = w->mdev; + struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, }; if (!get_ldev(mdev)) { if (__ratelimit(&drbd_ratelimit_state)) @@ -725,7 +726,7 @@ static int w_update_odbm(struct drbd_work *w, int unused) break; } } - drbd_bcast_sync_progress(mdev); + drbd_bcast_event(mdev, &sib); return 1; } diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e6875834464..429fd8da6b7 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -44,6 +44,7 @@ #include #include #include +#include #include #include "drbd_state.h" @@ -65,7 +66,6 @@ extern unsigned int minor_count; extern int disable_sendpage; extern int allow_oos; -extern unsigned int cn_idx; #ifdef CONFIG_DRBD_FAULT_INJECTION extern int enable_faults; @@ -865,14 +865,6 @@ struct drbd_md { */ }; -/* for sync_conf and other types... */ -#define NL_PACKET(name, number, fields) struct name { fields }; -#define NL_INTEGER(pn,pr,member) int member; -#define NL_INT64(pn,pr,member) __u64 member; -#define NL_BIT(pn,pr,member) unsigned member:1; -#define NL_STRING(pn,pr,member,len) unsigned char member[len]; int member ## _len; -#include "linux/drbd_nl.h" - struct drbd_backing_dev { struct block_device *backing_bdev; struct block_device *md_bdev; @@ -1502,7 +1494,7 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, extern void drbd_free_mdev(struct drbd_conf *mdev); extern void drbd_delete_device(unsigned int minor); -struct drbd_tconn *drbd_new_tconn(char *name); +struct drbd_tconn *drbd_new_tconn(const char *name); extern void drbd_free_tconn(struct drbd_tconn *tconn); struct drbd_tconn *conn_by_name(const char *name); @@ -1679,16 +1671,22 @@ extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, extern void drbd_al_apply_to_bm(struct drbd_conf *mdev); extern void drbd_al_shrink(struct drbd_conf *mdev); - /* drbd_nl.c */ - -void drbd_nl_cleanup(void); -int __init drbd_nl_init(void); -void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state); -void drbd_bcast_sync_progress(struct drbd_conf *mdev); -void drbd_bcast_ee(struct drbd_conf *, const char *, const int, const char *, - const char *, const struct drbd_peer_request *); - +/* state info broadcast */ +struct sib_info { + enum drbd_state_info_bcast_reason sib_reason; + union { + struct { + char *helper_name; + unsigned helper_exit_code; + }; + struct { + union drbd_state os; + union drbd_state ns; + }; + }; +}; +void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib); /* * inline helper functions diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 9f6db5947c6..9697ab87209 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -86,7 +86,6 @@ MODULE_PARM_DESC(allow_oos, "DONT USE!"); module_param(minor_count, uint, 0444); module_param(disable_sendpage, bool, 0644); module_param(allow_oos, bool, 0); -module_param(cn_idx, uint, 0444); module_param(proc_details, int, 0644); #ifdef CONFIG_DRBD_FAULT_INJECTION @@ -108,7 +107,6 @@ module_param(fault_devs, int, 0644); unsigned int minor_count = DRBD_MINOR_COUNT_DEF; int disable_sendpage; int allow_oos; -unsigned int cn_idx = CN_IDX_DRBD; int proc_details; /* Detail level in proc drbd*/ /* Module parameter for setting the user mode helper program @@ -2175,7 +2173,7 @@ static void drbd_cleanup(void) if (drbd_proc) remove_proc_entry("drbd", NULL); - drbd_nl_cleanup(); + drbd_genl_unregister(); idr_for_each_entry(&minors, mdev, i) drbd_delete_device(i); @@ -2237,6 +2235,9 @@ struct drbd_tconn *conn_by_name(const char *name) { struct drbd_tconn *tconn; + if (!name || !name[0]) + return NULL; + write_lock_irq(&global_state_lock); list_for_each_entry(tconn, &drbd_tconns, all_tconn) { if (!strcmp(tconn->name, name)) @@ -2248,7 +2249,7 @@ found: return tconn; } -struct drbd_tconn *drbd_new_tconn(char *name) +struct drbd_tconn *drbd_new_tconn(const char *name) { struct drbd_tconn *tconn; @@ -2333,6 +2334,7 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, mdev->tconn = tconn; mdev->minor = minor; + mdev->vnr = vnr; drbd_init_set_defaults(mdev); @@ -2461,10 +2463,6 @@ int __init drbd_init(void) #endif } - err = drbd_nl_init(); - if (err) - return err; - err = register_blkdev(DRBD_MAJOR, "drbd"); if (err) { printk(KERN_ERR @@ -2473,6 +2471,13 @@ int __init drbd_init(void) return err; } + err = drbd_genl_register(); + if (err) { + printk(KERN_ERR "drbd: unable to register generic netlink family\n"); + goto fail; + } + + register_reboot_notifier(&drbd_notifier); /* @@ -2487,12 +2492,12 @@ int __init drbd_init(void) err = drbd_create_mempools(); if (err) - goto Enomem; + goto fail; drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL); if (!drbd_proc) { printk(KERN_ERR "drbd: unable to register proc file\n"); - goto Enomem; + goto fail; } rwlock_init(&global_state_lock); @@ -2507,7 +2512,7 @@ int __init drbd_init(void) return 0; /* Success! */ -Enomem: +fail: drbd_cleanup(); if (err == -ENOMEM) /* currently always the case */ diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index f2739fd188a..f9be14248e3 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -29,110 +29,225 @@ #include #include #include -#include #include #include #include "drbd_int.h" #include "drbd_req.h" #include "drbd_wrappers.h" #include -#include #include -#include #include -static unsigned short *tl_add_blob(unsigned short *, enum drbd_tags, const void *, int); -static unsigned short *tl_add_str(unsigned short *, enum drbd_tags, const char *); -static unsigned short *tl_add_int(unsigned short *, enum drbd_tags, const void *); +#include -/* see get_sb_bdev and bd_claim */ +/* .doit */ +// int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info); +// int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info); + +int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info); + +int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info); + +int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info); +/* .dumpit */ +int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb); + +#include +#include + +/* used blkdev_get_by_path, to claim our meta data device(s) */ static char *drbd_m_holder = "Hands off! this is DRBD's meta data device."; -/* Generate the tag_list to struct functions */ -#define NL_PACKET(name, number, fields) \ -static int name ## _from_tags( \ - unsigned short *tags, struct name *arg) __attribute__ ((unused)); \ -static int name ## _from_tags( \ - unsigned short *tags, struct name *arg) \ -{ \ - int tag; \ - int dlen; \ - \ - while ((tag = get_unaligned(tags++)) != TT_END) { \ - dlen = get_unaligned(tags++); \ - switch (tag_number(tag)) { \ - fields \ - default: \ - if (tag & T_MANDATORY) { \ - printk(KERN_ERR "drbd: Unknown tag: %d\n", tag_number(tag)); \ - return 0; \ - } \ - } \ - tags = (unsigned short *)((char *)tags + dlen); \ - } \ - return 1; \ -} -#define NL_INTEGER(pn, pr, member) \ - case pn: /* D_ASSERT( tag_type(tag) == TT_INTEGER ); */ \ - arg->member = get_unaligned((int *)(tags)); \ - break; -#define NL_INT64(pn, pr, member) \ - case pn: /* D_ASSERT( tag_type(tag) == TT_INT64 ); */ \ - arg->member = get_unaligned((u64 *)(tags)); \ - break; -#define NL_BIT(pn, pr, member) \ - case pn: /* D_ASSERT( tag_type(tag) == TT_BIT ); */ \ - arg->member = *(char *)(tags) ? 1 : 0; \ - break; -#define NL_STRING(pn, pr, member, len) \ - case pn: /* D_ASSERT( tag_type(tag) == TT_STRING ); */ \ - if (dlen > len) { \ - printk(KERN_ERR "drbd: arg too long: %s (%u wanted, max len: %u bytes)\n", \ - #member, dlen, (unsigned int)len); \ - return 0; \ - } \ - arg->member ## _len = dlen; \ - memcpy(arg->member, tags, min_t(size_t, dlen, len)); \ - break; -#include "linux/drbd_nl.h" +/* Configuration is strictly serialized, because generic netlink message + * processing is strictly serialized by the genl_lock(). + * Which means we can use one static global drbd_config_context struct. + */ +static struct drbd_config_context { + /* assigned from drbd_genlmsghdr */ + unsigned int minor; + /* assigned from request attributes, if present */ + unsigned int volume; +#define VOLUME_UNSPECIFIED (-1U) + /* pointer into the request skb, + * limited lifetime! */ + char *conn_name; -/* Generate the struct to tag_list functions */ -#define NL_PACKET(name, number, fields) \ -static unsigned short* \ -name ## _to_tags( \ - struct name *arg, unsigned short *tags) __attribute__ ((unused)); \ -static unsigned short* \ -name ## _to_tags( \ - struct name *arg, unsigned short *tags) \ -{ \ - fields \ - return tags; \ + /* reply buffer */ + struct sk_buff *reply_skb; + /* pointer into reply buffer */ + struct drbd_genlmsghdr *reply_dh; + /* resolved from attributes, if possible */ + struct drbd_conf *mdev; + struct drbd_tconn *tconn; +} adm_ctx; + +static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info) +{ + genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb)))); + if (genlmsg_reply(skb, info)) + printk(KERN_ERR "drbd: error sending genl reply\n"); } -#define NL_INTEGER(pn, pr, member) \ - put_unaligned(pn | pr | TT_INTEGER, tags++); \ - put_unaligned(sizeof(int), tags++); \ - put_unaligned(arg->member, (int *)tags); \ - tags = (unsigned short *)((char *)tags+sizeof(int)); -#define NL_INT64(pn, pr, member) \ - put_unaligned(pn | pr | TT_INT64, tags++); \ - put_unaligned(sizeof(u64), tags++); \ - put_unaligned(arg->member, (u64 *)tags); \ - tags = (unsigned short *)((char *)tags+sizeof(u64)); -#define NL_BIT(pn, pr, member) \ - put_unaligned(pn | pr | TT_BIT, tags++); \ - put_unaligned(sizeof(char), tags++); \ - *(char *)tags = arg->member; \ - tags = (unsigned short *)((char *)tags+sizeof(char)); -#define NL_STRING(pn, pr, member, len) \ - put_unaligned(pn | pr | TT_STRING, tags++); \ - put_unaligned(arg->member ## _len, tags++); \ - memcpy(tags, arg->member, arg->member ## _len); \ - tags = (unsigned short *)((char *)tags + arg->member ## _len); -#include "linux/drbd_nl.h" +/* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only + * reason it could fail was no space in skb, and there are 4k available. */ +static int drbd_msg_put_info(const char *info) +{ + struct sk_buff *skb = adm_ctx.reply_skb; + struct nlattr *nla; + int err = -EMSGSIZE; -void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name); -void drbd_nl_send_reply(struct cn_msg *, int); + if (!info || !info[0]) + return 0; + + nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY); + if (!nla) + return err; + + err = nla_put_string(skb, T_info_text, info); + if (err) { + nla_nest_cancel(skb, nla); + return err; + } else + nla_nest_end(skb, nla); + return 0; +} + +/* This would be a good candidate for a "pre_doit" hook, + * and per-family private info->pointers. + * But we need to stay compatible with older kernels. + * If it returns successfully, adm_ctx members are valid. + */ +#define DRBD_ADM_NEED_MINOR 1 +#define DRBD_ADM_NEED_CONN 2 +static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, + unsigned flags) +{ + struct drbd_genlmsghdr *d_in = info->userhdr; + const u8 cmd = info->genlhdr->cmd; + int err; + + memset(&adm_ctx, 0, sizeof(adm_ctx)); + + /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */ + if (cmd != DRBD_ADM_GET_STATUS + && security_netlink_recv(skb, CAP_SYS_ADMIN)) + return -EPERM; + + adm_ctx.reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!adm_ctx.reply_skb) + goto fail; + + adm_ctx.reply_dh = genlmsg_put_reply(adm_ctx.reply_skb, + info, &drbd_genl_family, 0, cmd); + /* put of a few bytes into a fresh skb of >= 4k will always succeed. + * but anyways */ + if (!adm_ctx.reply_dh) + goto fail; + + adm_ctx.reply_dh->minor = d_in->minor; + adm_ctx.reply_dh->ret_code = NO_ERROR; + + if (info->attrs[DRBD_NLA_CFG_CONTEXT]) { + struct nlattr *nla; + /* parse and validate only */ + err = drbd_cfg_context_from_attrs(NULL, info->attrs); + if (err) + goto fail; + + /* It was present, and valid, + * copy it over to the reply skb. */ + err = nla_put_nohdr(adm_ctx.reply_skb, + info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len, + info->attrs[DRBD_NLA_CFG_CONTEXT]); + if (err) + goto fail; + + /* and assign stuff to the global adm_ctx */ + nla = nested_attr_tb[__nla_type(T_ctx_volume)]; + adm_ctx.volume = nla ? nla_get_u32(nla) : VOLUME_UNSPECIFIED; + nla = nested_attr_tb[__nla_type(T_ctx_conn_name)]; + if (nla) + adm_ctx.conn_name = nla_data(nla); + } else + adm_ctx.volume = VOLUME_UNSPECIFIED; + + adm_ctx.minor = d_in->minor; + adm_ctx.mdev = minor_to_mdev(d_in->minor); + adm_ctx.tconn = conn_by_name(adm_ctx.conn_name); + + if (!adm_ctx.mdev && (flags & DRBD_ADM_NEED_MINOR)) { + drbd_msg_put_info("unknown minor"); + return ERR_MINOR_INVALID; + } + if (!adm_ctx.tconn && (flags & DRBD_ADM_NEED_CONN)) { + drbd_msg_put_info("unknown connection"); + return ERR_INVALID_REQUEST; + } + + /* some more paranoia, if the request was over-determined */ + if (adm_ctx.mdev && + adm_ctx.volume != VOLUME_UNSPECIFIED && + adm_ctx.volume != adm_ctx.mdev->vnr) { + pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n", + adm_ctx.minor, adm_ctx.volume, + adm_ctx.mdev->vnr, adm_ctx.mdev->tconn->name); + drbd_msg_put_info("over-determined configuration context mismatch"); + return ERR_INVALID_REQUEST; + } + if (adm_ctx.mdev && adm_ctx.tconn && + adm_ctx.mdev->tconn != adm_ctx.tconn) { + pr_warning("request: minor=%u, conn=%s; but that minor belongs to connection %s\n", + adm_ctx.minor, adm_ctx.conn_name, adm_ctx.mdev->tconn->name); + drbd_msg_put_info("over-determined configuration context mismatch"); + return ERR_INVALID_REQUEST; + } + return NO_ERROR; + +fail: + nlmsg_free(adm_ctx.reply_skb); + adm_ctx.reply_skb = NULL; + return -ENOMEM; +} + +static int drbd_adm_finish(struct genl_info *info, int retcode) +{ + struct nlattr *nla; + const char *conn_name = NULL; + + if (!adm_ctx.reply_skb) + return -ENOMEM; + + adm_ctx.reply_dh->ret_code = retcode; + + nla = info->attrs[DRBD_NLA_CFG_CONTEXT]; + if (nla) { + nla = nla_find_nested(nla, __nla_type(T_ctx_conn_name)); + if (nla) + conn_name = nla_data(nla); + } + + drbd_adm_send_reply(adm_ctx.reply_skb, info); + return 0; +} int drbd_khelper(struct drbd_conf *mdev, char *cmd) { @@ -142,9 +257,9 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) NULL, /* Will be set to address family */ NULL, /* Will be set to address */ NULL }; - char mb[12], af[20], ad[60], *afs; char *argv[] = {usermode_helper, cmd, mb, NULL }; + struct sib_info sib; int ret; snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev)); @@ -177,8 +292,9 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) drbd_md_sync(mdev); dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb); - - drbd_bcast_ev_helper(mdev, cmd); + sib.sib_reason = SIB_HELPER_PRE; + sib.helper_name = cmd; + drbd_bcast_event(mdev, &sib); ret = call_usermodehelper(usermode_helper, argv, envp, 1); if (ret) dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", @@ -188,6 +304,9 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) dev_info(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", usermode_helper, cmd, mb, (ret >> 8) & 0xff, ret); + sib.sib_reason = SIB_HELPER_POST; + sib.helper_exit_code = ret; + drbd_bcast_event(mdev, &sib); if (ret < 0) /* Ignore any ERRNOs we got. */ ret = 0; @@ -362,7 +481,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) } if (rv == SS_NOTHING_TO_DO) - goto fail; + goto out; if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) { nps = drbd_try_outdate_peer(mdev); @@ -388,13 +507,13 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) rv = _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_WAIT_COMPLETE); if (rv < SS_SUCCESS) - goto fail; + goto out; } break; } if (rv < SS_SUCCESS) - goto fail; + goto out; if (forced) dev_warn(DEV, "Forced to consider local data as UpToDate!\n"); @@ -438,33 +557,46 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) drbd_md_sync(mdev); kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); - fail: +out: mutex_unlock(mdev->state_mutex); return rv; } -static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +static const char *from_attrs_err_to_txt(int err) { - struct primary primary_args; - - memset(&primary_args, 0, sizeof(struct primary)); - if (!primary_from_tags(nlp->tag_list, &primary_args)) { - reply->ret_code = ERR_MANDATORY_TAG; - return 0; - } - - reply->ret_code = - drbd_set_role(mdev, R_PRIMARY, primary_args.primary_force); - - return 0; + return err == -ENOMSG ? "required attribute missing" : + err == -EOPNOTSUPP ? "unknown mandatory attribute" : + "invalid attribute value"; } -static int drbd_nl_secondary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info) { - reply->ret_code = drbd_set_role(mdev, R_SECONDARY, 0); + struct set_role_parms parms; + int err; + enum drbd_ret_code retcode; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + memset(&parms, 0, sizeof(parms)); + if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) { + err = set_role_parms_from_attrs(&parms, info->attrs); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto out; + } + } + + if (info->genlhdr->cmd == DRBD_ADM_PRIMARY) + retcode = drbd_set_role(adm_ctx.mdev, R_PRIMARY, parms.assume_uptodate); + else + retcode = drbd_set_role(adm_ctx.mdev, R_SECONDARY, 0); +out: + drbd_adm_finish(info, retcode); return 0; } @@ -541,6 +673,12 @@ char *ppsize(char *buf, unsigned long long size) * R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET: * peer may not initiate a resize. */ +/* Note these are not to be confused with + * drbd_adm_suspend_io/drbd_adm_resume_io, + * which are (sub) state changes triggered by admin (drbdsetup), + * and can be long lived. + * This changes an mdev->flag, is triggered by drbd internals, + * and should be short-lived. */ void drbd_suspend_io(struct drbd_conf *mdev) { set_bit(SUSPEND_IO, &mdev->flags); @@ -881,11 +1019,10 @@ static void drbd_suspend_al(struct drbd_conf *mdev) dev_info(DEV, "Suspended AL updates\n"); } -/* does always return 0; - * interesting return code is in reply->ret_code */ -static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) { + struct drbd_conf *mdev; + int err; enum drbd_ret_code retcode; enum determine_dev_size dd; sector_t max_possible_sectors; @@ -897,6 +1034,13 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp enum drbd_state_rv rv; int cp_discovered = 0; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto fail; + + mdev = adm_ctx.mdev; conn_reconfig_start(mdev->tconn); /* if you want to reconfigure, please tear down first */ @@ -910,7 +1054,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp * to realize a "hot spare" feature (not that I'd recommend that) */ wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); - /* allocation not in the IO path, cqueue thread context */ + /* allocation not in the IO path, drbdsetup context */ nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); if (!nbc) { retcode = ERR_NOMEM; @@ -922,12 +1066,14 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp nbc->dc.fencing = DRBD_FENCING_DEF; nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF; - if (!disk_conf_from_tags(nlp->tag_list, &nbc->dc)) { + err = disk_conf_from_attrs(&nbc->dc, info->attrs); + if (err) { retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); goto fail; } - if (nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { + if ((int)nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { retcode = ERR_MD_IDX_INVALID; goto fail; } @@ -961,7 +1107,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp */ bdev = blkdev_get_by_path(nbc->dc.meta_dev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, - (nbc->dc.meta_dev_idx < 0) ? + ((int)nbc->dc.meta_dev_idx < 0) ? (void *)mdev : (void *)drbd_m_holder); if (IS_ERR(bdev)) { dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev, @@ -997,7 +1143,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp goto fail; } - if (nbc->dc.meta_dev_idx < 0) { + if ((int)nbc->dc.meta_dev_idx < 0) { max_possible_sectors = DRBD_MAX_SECTORS_FLEX; /* at least one MB, otherwise it does not make sense */ min_md_device_sectors = (2<<10); @@ -1028,7 +1174,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp dev_warn(DEV, "==> truncating very big lower level device " "to currently maximum possible %llu sectors <==\n", (unsigned long long) max_possible_sectors); - if (nbc->dc.meta_dev_idx >= 0) + if ((int)nbc->dc.meta_dev_idx >= 0) dev_warn(DEV, "==>> using internal or flexible " "meta data may help <<==\n"); } @@ -1242,8 +1388,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); put_ldev(mdev); - reply->ret_code = retcode; conn_reconfig_done(mdev->tconn); + drbd_adm_finish(info, retcode); return 0; force_diskless_dec: @@ -1251,6 +1397,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp force_diskless: drbd_force_state(mdev, NS(disk, D_FAILED)); drbd_md_sync(mdev); + conn_reconfig_done(mdev->tconn); fail: if (nbc) { if (nbc->backing_bdev) @@ -1263,8 +1410,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } lc_destroy(resync_lru); - reply->ret_code = retcode; - conn_reconfig_done(mdev->tconn); + drbd_adm_finish(info, retcode); return 0; } @@ -1273,42 +1419,54 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp * Then we transition to D_DISKLESS, and wait for put_ldev() to return all * internal references as well. * Only then we have finally detached. */ -static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) { + struct drbd_conf *mdev; enum drbd_ret_code retcode; - int ret; + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + mdev = adm_ctx.mdev; drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ - retcode = drbd_request_state(mdev, NS(disk, D_FAILED)); - /* D_FAILED will transition to DISKLESS. */ - ret = wait_event_interruptible(mdev->misc_wait, - mdev->state.disk != D_FAILED); + retcode = drbd_request_state(mdev, NS(disk, D_DISKLESS)); + wait_event(mdev->misc_wait, + mdev->state.disk != D_DISKLESS || + !atomic_read(&mdev->local_cnt)); drbd_resume_io(mdev); - if ((int)retcode == (int)SS_IS_DISKLESS) - retcode = SS_NOTHING_TO_DO; - if (ret) - retcode = ERR_INTR; - reply->ret_code = retcode; +out: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_net_conf(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) { - int i; - enum drbd_ret_code retcode; + char hmac_name[CRYPTO_MAX_ALG_NAME]; + struct drbd_conf *mdev; struct net_conf *new_conf = NULL; struct crypto_hash *tfm = NULL; struct crypto_hash *integrity_w_tfm = NULL; struct crypto_hash *integrity_r_tfm = NULL; - struct drbd_conf *mdev; - char hmac_name[CRYPTO_MAX_ALG_NAME]; void *int_dig_out = NULL; void *int_dig_in = NULL; void *int_dig_vv = NULL; struct drbd_tconn *oconn; + struct drbd_tconn *tconn; struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr; + enum drbd_ret_code retcode; + int i; + int err; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + tconn = adm_ctx.tconn; conn_reconfig_start(tconn); if (tconn->cstate > C_STANDALONE) { @@ -1343,8 +1501,10 @@ static int drbd_nl_net_conf(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *nl new_conf->on_congestion = DRBD_ON_CONGESTION_DEF; new_conf->cong_extents = DRBD_CONG_EXTENTS_DEF; - if (!net_conf_from_tags(nlp->tag_list, new_conf)) { + err = net_conf_from_attrs(new_conf, info->attrs); + if (err) { retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); goto fail; } @@ -1495,8 +1655,8 @@ static int drbd_nl_net_conf(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *nl mdev->recv_cnt = 0; kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); } - reply->ret_code = retcode; conn_reconfig_done(tconn); + drbd_adm_finish(info, retcode); return 0; fail: @@ -1508,24 +1668,37 @@ fail: crypto_free_hash(integrity_r_tfm); kfree(new_conf); - reply->ret_code = retcode; conn_reconfig_done(tconn); +out: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_disconnect(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) { - int retcode; - struct disconnect dc; + struct disconnect_parms parms; + struct drbd_tconn *tconn; + enum drbd_ret_code retcode; + int err; - memset(&dc, 0, sizeof(struct disconnect)); - if (!disconnect_from_tags(nlp->tag_list, &dc)) { - retcode = ERR_MANDATORY_TAG; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) goto fail; + + tconn = adm_ctx.tconn; + memset(&parms, 0, sizeof(parms)); + if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) { + err = disconnect_parms_from_attrs(&parms, info->attrs); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto fail; + } } - if (dc.force) { + if (parms.force_disconnect) { spin_lock_irq(&tconn->req_lock); if (tconn->cstate >= C_WF_CONNECTION) _conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); @@ -1567,7 +1740,7 @@ static int drbd_nl_disconnect(struct drbd_tconn *tconn, struct drbd_nl_cfg_req * done: retcode = NO_ERROR; fail: - reply->ret_code = retcode; + drbd_adm_finish(info, retcode); return 0; } @@ -1587,20 +1760,32 @@ void resync_after_online_grow(struct drbd_conf *mdev) _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE); } -static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) { - struct resize rs; - int retcode = NO_ERROR; + struct resize_parms rs; + struct drbd_conf *mdev; + enum drbd_ret_code retcode; enum determine_dev_size dd; enum dds_flags ddsf; + int err; - memset(&rs, 0, sizeof(struct resize)); - if (!resize_from_tags(nlp->tag_list, &rs)) { - retcode = ERR_MANDATORY_TAG; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) goto fail; + + memset(&rs, 0, sizeof(struct resize_parms)); + if (info->attrs[DRBD_NLA_RESIZE_PARMS]) { + err = resize_parms_from_attrs(&rs, info->attrs); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto fail; + } } + mdev = adm_ctx.mdev; if (mdev->state.conn > C_CONNECTED) { retcode = ERR_RESIZE_RESYNC; goto fail; @@ -1644,14 +1829,14 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, } fail: - reply->ret_code = retcode; + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info) { - int retcode = NO_ERROR; + struct drbd_conf *mdev; + enum drbd_ret_code retcode; int err; int ovr; /* online verify running */ int rsr; /* re-sync running */ @@ -1662,12 +1847,21 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n int *rs_plan_s = NULL; int fifo_size; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto fail; + mdev = adm_ctx.mdev; + if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) { retcode = ERR_NOMEM; + drbd_msg_put_info("unable to allocate cpumask"); goto fail; } - if (nlp->flags & DRBD_NL_SET_DEFAULTS) { + if (((struct drbd_genlmsghdr*)info->userhdr)->flags + & DRBD_GENL_F_SET_DEFAULTS) { memset(&sc, 0, sizeof(struct syncer_conf)); sc.rate = DRBD_RATE_DEF; sc.after = DRBD_AFTER_DEF; @@ -1681,8 +1875,10 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n } else memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf)); - if (!syncer_conf_from_tags(nlp->tag_list, &sc)) { + err = syncer_conf_from_attrs(&sc, info->attrs); + if (err) { retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); goto fail; } @@ -1832,14 +2028,23 @@ fail: free_cpumask_var(new_cpu_mask); crypto_free_hash(csums_tfm); crypto_free_hash(verify_tfm); - reply->ret_code = retcode; + + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) { - int retcode; + struct drbd_conf *mdev; + int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */ + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + mdev = adm_ctx.mdev; /* If there is still bitmap IO pending, probably because of a previous * resync just being finished, wait for it before requesting a new resync. */ @@ -1862,7 +2067,8 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); } - reply->ret_code = retcode; +out: + drbd_adm_finish(info, retcode); return 0; } @@ -1875,56 +2081,58 @@ static int drbd_bmio_set_susp_al(struct drbd_conf *mdev) return rv; } -static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info, + union drbd_state mask, union drbd_state val) { - int retcode; + enum drbd_ret_code retcode; - /* If there is still bitmap IO pending, probably because of a previous - * resync just being finished, wait for it before requesting a new resync. */ - wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; - retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); - - if (retcode < SS_SUCCESS) { - if (retcode == SS_NEED_CONNECTION && mdev->state.role == R_PRIMARY) { - /* The peer will get a resync upon connect anyways. Just make that - into a full resync. */ - retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT)); - if (retcode >= SS_SUCCESS) { - if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al, - "set_n_write from invalidate_peer", - BM_LOCKED_SET_ALLOWED)) - retcode = ERR_IO_MD_DISK; - } - } else - retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S)); - } - - reply->ret_code = retcode; + retcode = drbd_request_state(adm_ctx.mdev, mask, val); +out: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_pause_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) { - int retcode = NO_ERROR; + return drbd_adm_simple_request_state(skb, info, NS(conn, C_STARTING_SYNC_S)); +} - if (drbd_request_state(mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO) +int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info) +{ + enum drbd_ret_code retcode; + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + if (drbd_request_state(adm_ctx.mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO) retcode = ERR_PAUSE_IS_SET; - - reply->ret_code = retcode; +out: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info) { - int retcode = NO_ERROR; union drbd_state s; + enum drbd_ret_code retcode; - if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) { - s = mdev->state; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + if (drbd_request_state(adm_ctx.mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) { + s = adm_ctx.mdev->state; if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) { retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP : s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR; @@ -1933,28 +2141,35 @@ static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n } } - reply->ret_code = retcode; +out: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_suspend_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info) { - reply->ret_code = drbd_request_state(mdev, NS(susp, 1)); - - return 0; + return drbd_adm_simple_request_state(skb, info, NS(susp, 1)); } -static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info) { + struct drbd_conf *mdev; + int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */ + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + mdev = adm_ctx.mdev; if (test_bit(NEW_CUR_UUID, &mdev->flags)) { drbd_uuid_new_current(mdev); clear_bit(NEW_CUR_UUID, &mdev->flags); } drbd_suspend_io(mdev); - reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0)); - if (reply->ret_code == SS_SUCCESS) { + retcode = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0)); + if (retcode == SS_SUCCESS) { if (mdev->state.conn < C_CONNECTED) tl_clear(mdev->tconn); if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED) @@ -1962,138 +2177,259 @@ static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } drbd_resume_io(mdev); +out: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_outdate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info) { - reply->ret_code = drbd_request_state(mdev, NS(disk, D_OUTDATED)); - return 0; + return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED)); } -static int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, + const struct sib_info *sib) { - unsigned short *tl; + struct state_info *si = NULL; /* for sizeof(si->member); */ + struct nlattr *nla; + int got_ldev; + int got_net; + int err = 0; + int exclude_sensitive; - tl = reply->tag_list; + /* If sib != NULL, this is drbd_bcast_event, which anyone can listen + * to. So we better exclude_sensitive information. + * + * If sib == NULL, this is drbd_adm_get_status, executed synchronously + * in the context of the requesting user process. Exclude sensitive + * information, unless current has superuser. + * + * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and + * relies on the current implementation of netlink_dump(), which + * executes the dump callback successively from netlink_recvmsg(), + * always in the context of the receiving process */ + exclude_sensitive = sib || !capable(CAP_SYS_ADMIN); - if (get_ldev(mdev)) { - tl = disk_conf_to_tags(&mdev->ldev->dc, tl); - put_ldev(mdev); - } + got_ldev = get_ldev(mdev); + got_net = get_net_conf(mdev->tconn); - if (get_net_conf(mdev->tconn)) { - tl = net_conf_to_tags(mdev->tconn->net_conf, tl); - put_net_conf(mdev->tconn); - } - tl = syncer_conf_to_tags(&mdev->sync_conf, tl); + /* We need to add connection name and volume number information still. + * Minor number is in drbd_genlmsghdr. */ + nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT); + if (!nla) + goto nla_put_failure; + NLA_PUT_U32(skb, T_ctx_volume, mdev->vnr); + NLA_PUT_STRING(skb, T_ctx_conn_name, mdev->tconn->name); + nla_nest_end(skb, nla); - put_unaligned(TT_END, tl++); /* Close the tag list */ + if (got_ldev) + if (disk_conf_to_skb(skb, &mdev->ldev->dc, exclude_sensitive)) + goto nla_put_failure; + if (got_net) + if (net_conf_to_skb(skb, mdev->tconn->net_conf, exclude_sensitive)) + goto nla_put_failure; - return (int)((char *)tl - (char *)reply->tag_list); -} + if (syncer_conf_to_skb(skb, &mdev->sync_conf, exclude_sensitive)) + goto nla_put_failure; -static int drbd_nl_get_state(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) -{ - unsigned short *tl = reply->tag_list; - union drbd_state s = mdev->state; - unsigned long rs_left; - unsigned int res; + nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO); + if (!nla) + goto nla_put_failure; + NLA_PUT_U32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY); + NLA_PUT_U32(skb, T_current_state, mdev->state.i); + NLA_PUT_U64(skb, T_ed_uuid, mdev->ed_uuid); + NLA_PUT_U64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev)); - tl = get_state_to_tags((struct get_state *)&s, tl); - - /* no local ref, no bitmap, no syncer progress. */ - if (s.conn >= C_SYNC_SOURCE && s.conn <= C_PAUSED_SYNC_T) { - if (get_ldev(mdev)) { - drbd_get_syncer_progress(mdev, &rs_left, &res); - tl = tl_add_int(tl, T_sync_progress, &res); - put_ldev(mdev); + if (got_ldev) { + NLA_PUT_U32(skb, T_disk_flags, mdev->ldev->md.flags); + NLA_PUT(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid); + NLA_PUT_U64(skb, T_bits_total, drbd_bm_bits(mdev)); + NLA_PUT_U64(skb, T_bits_oos, drbd_bm_total_weight(mdev)); + if (C_SYNC_SOURCE <= mdev->state.conn && + C_PAUSED_SYNC_T >= mdev->state.conn) { + NLA_PUT_U64(skb, T_bits_rs_total, mdev->rs_total); + NLA_PUT_U64(skb, T_bits_rs_failed, mdev->rs_failed); } } - put_unaligned(TT_END, tl++); /* Close the tag list */ - return (int)((char *)tl - (char *)reply->tag_list); -} + if (sib) { + switch(sib->sib_reason) { + case SIB_SYNC_PROGRESS: + case SIB_GET_STATUS_REPLY: + break; + case SIB_STATE_CHANGE: + NLA_PUT_U32(skb, T_prev_state, sib->os.i); + NLA_PUT_U32(skb, T_new_state, sib->ns.i); + break; + case SIB_HELPER_POST: + NLA_PUT_U32(skb, + T_helper_exit_code, sib->helper_exit_code); + /* fall through */ + case SIB_HELPER_PRE: + NLA_PUT_STRING(skb, T_helper, sib->helper_name); + break; + } + } + nla_nest_end(skb, nla); -static int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) -{ - unsigned short *tl; - - tl = reply->tag_list; - - if (get_ldev(mdev)) { - tl = tl_add_blob(tl, T_uuids, mdev->ldev->md.uuid, UI_SIZE*sizeof(u64)); - tl = tl_add_int(tl, T_uuids_flags, &mdev->ldev->md.flags); + if (0) +nla_put_failure: + err = -EMSGSIZE; + if (got_ldev) put_ldev(mdev); - } - put_unaligned(TT_END, tl++); /* Close the tag list */ - - return (int)((char *)tl - (char *)reply->tag_list); + if (got_net) + put_net_conf(mdev->tconn); + return err; } -/** - * drbd_nl_get_timeout_flag() - Used by drbdsetup to find out which timeout value to use - * @mdev: DRBD device. - * @nlp: Netlink/connector packet from drbdsetup - * @reply: Reply packet for drbdsetup - */ -static int drbd_nl_get_timeout_flag(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info) { - unsigned short *tl; - char rv; + enum drbd_ret_code retcode; + int err; - tl = reply->tag_list; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; - rv = mdev->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED : - test_bit(USE_DEGR_WFC_T, &mdev->flags) ? UT_DEGRADED : UT_DEFAULT; - - tl = tl_add_blob(tl, T_use_degraded, &rv, sizeof(rv)); - put_unaligned(TT_END, tl++); /* Close the tag list */ - - return (int)((char *)tl - (char *)reply->tag_list); + err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.mdev, NULL); + if (err) { + nlmsg_free(adm_ctx.reply_skb); + return err; + } +out: + drbd_adm_finish(info, retcode); + return 0; } -static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb) { - /* default to resume from last known position, if possible */ - struct start_ov args = - { .start_sector = mdev->ov_start_sector }; + struct drbd_conf *mdev; + struct drbd_genlmsghdr *dh; + int minor = cb->args[0]; - if (!start_ov_from_tags(nlp->tag_list, &args)) { - reply->ret_code = ERR_MANDATORY_TAG; - return 0; + /* Open coded deferred single idr_for_each_entry iteration. + * This may miss entries inserted after this dump started, + * or entries deleted before they are reached. + * But we need to make sure the mdev won't disappear while + * we are looking at it. */ + + rcu_read_lock(); + mdev = idr_get_next(&minors, &minor); + if (mdev) { + dh = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, &drbd_genl_family, + NLM_F_MULTI, DRBD_ADM_GET_STATUS); + if (!dh) + goto errout; + + D_ASSERT(mdev->minor == minor); + + dh->minor = minor; + dh->ret_code = NO_ERROR; + + if (nla_put_status_info(skb, mdev, NULL)) { + genlmsg_cancel(skb, dh); + goto errout; + } + genlmsg_end(skb, dh); + } + +errout: + rcu_read_unlock(); + /* where to start idr_get_next with the next iteration */ + cb->args[0] = minor+1; + + /* No more minors found: empty skb. Which will terminate the dump. */ + return skb->len; +} + +int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info) +{ + enum drbd_ret_code retcode; + struct timeout_parms tp; + int err; + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + tp.timeout_type = + adm_ctx.mdev->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED : + test_bit(USE_DEGR_WFC_T, &adm_ctx.mdev->flags) ? UT_DEGRADED : + UT_DEFAULT; + + err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp); + if (err) { + nlmsg_free(adm_ctx.reply_skb); + return err; } +out: + drbd_adm_finish(info, retcode); + return 0; +} +int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) +{ + struct drbd_conf *mdev; + enum drbd_ret_code retcode; + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + mdev = adm_ctx.mdev; + if (info->attrs[DRBD_NLA_START_OV_PARMS]) { + /* resume from last known position, if possible */ + struct start_ov_parms parms = + { .ov_start_sector = mdev->ov_start_sector }; + int err = start_ov_parms_from_attrs(&parms, info->attrs); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto out; + } + /* w_make_ov_request expects position to be aligned */ + mdev->ov_start_sector = parms.ov_start_sector & ~BM_SECT_PER_BIT; + } /* If there is still bitmap IO pending, e.g. previous resync or verify * just being finished, wait for it before requesting a new resync. */ wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); - - /* w_make_ov_request expects position to be aligned */ - mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT; - reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); + retcode = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); +out: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info) { - int retcode = NO_ERROR; + struct drbd_conf *mdev; + enum drbd_ret_code retcode; int skip_initial_sync = 0; int err; + struct new_c_uuid_parms args; - struct new_c_uuid args; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out_nolock; - memset(&args, 0, sizeof(struct new_c_uuid)); - if (!new_c_uuid_from_tags(nlp->tag_list, &args)) { - reply->ret_code = ERR_MANDATORY_TAG; - return 0; + mdev = adm_ctx.mdev; + memset(&args, 0, sizeof(args)); + if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) { + err = new_c_uuid_parms_from_attrs(&args, info->attrs); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto out_nolock; + } } mutex_lock(mdev->state_mutex); /* Protects us against serialized state changes. */ @@ -2139,510 +2475,164 @@ out_dec: put_ldev(mdev); out: mutex_unlock(mdev->state_mutex); - - reply->ret_code = retcode; +out_nolock: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_new_conn(struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) +static enum drbd_ret_code +drbd_check_conn_name(const char *name) { - struct new_connection args; + if (!name || !name[0]) { + drbd_msg_put_info("connection name missing"); + return ERR_MANDATORY_TAG; + } + /* if we want to use these in sysfs/configfs/debugfs some day, + * we must not allow slashes */ + if (strchr(name, '/')) { + drbd_msg_put_info("invalid connection name"); + return ERR_INVALID_REQUEST; + } + return NO_ERROR; +} - if (!new_connection_from_tags(nlp->tag_list, &args)) { - reply->ret_code = ERR_MANDATORY_TAG; - return 0; +int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info) +{ + enum drbd_ret_code retcode; + + retcode = drbd_adm_prepare(skb, info, 0); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + retcode = drbd_check_conn_name(adm_ctx.conn_name); + if (retcode != NO_ERROR) + goto out; + + if (adm_ctx.tconn) { + retcode = ERR_INVALID_REQUEST; + drbd_msg_put_info("connection exists"); + goto out; } - reply->ret_code = NO_ERROR; - if (!drbd_new_tconn(args.name)) - reply->ret_code = ERR_NOMEM; - + if (!drbd_new_tconn(adm_ctx.conn_name)) + retcode = ERR_NOMEM; +out: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_new_minor(struct drbd_tconn *tconn, - struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) +int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info) { - struct new_minor args; + struct drbd_genlmsghdr *dh = info->userhdr; + enum drbd_ret_code retcode; - args.vol_nr = 0; - args.minor = 0; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; - if (!new_minor_from_tags(nlp->tag_list, &args)) { - reply->ret_code = ERR_MANDATORY_TAG; - return 0; + /* FIXME drop minor_count parameter, limit to MINORMASK */ + if (dh->minor >= minor_count) { + drbd_msg_put_info("requested minor out of range"); + retcode = ERR_INVALID_REQUEST; + goto out; + } + /* FIXME we need a define here */ + if (adm_ctx.volume >= 256) { + drbd_msg_put_info("requested volume id out of range"); + retcode = ERR_INVALID_REQUEST; + goto out; } - reply->ret_code = conn_new_minor(tconn, args.minor, args.vol_nr); - + retcode = conn_new_minor(adm_ctx.tconn, dh->minor, adm_ctx.volume); +out: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_del_minor(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info) { + struct drbd_conf *mdev; + enum drbd_ret_code retcode; + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + mdev = adm_ctx.mdev; if (mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE && mdev->state.role == R_SECONDARY) { drbd_delete_device(mdev_to_minor(mdev)); - reply->ret_code = NO_ERROR; - } else { - reply->ret_code = ERR_MINOR_CONFIGURED; - } + retcode = NO_ERROR; + } else + retcode = ERR_MINOR_CONFIGURED; +out: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_del_conn(struct drbd_tconn *tconn, - struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) +int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info) { - if (conn_lowest_minor(tconn) < 0) { - drbd_free_tconn(tconn); - reply->ret_code = NO_ERROR; + enum drbd_ret_code retcode; + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + if (conn_lowest_minor(adm_ctx.tconn) < 0) { + drbd_free_tconn(adm_ctx.tconn); + retcode = NO_ERROR; } else { - reply->ret_code = ERR_CONN_IN_USE; + retcode = ERR_CONN_IN_USE; } +out: + drbd_adm_finish(info, retcode); return 0; } -enum cn_handler_type { - CHT_MINOR, - CHT_CONN, - CHT_CTOR, - /* CHT_RES, later */ -}; -struct cn_handler_struct { - enum cn_handler_type type; - union { - int (*minor_based)(struct drbd_conf *, - struct drbd_nl_cfg_req *, - struct drbd_nl_cfg_reply *); - int (*conn_based)(struct drbd_tconn *, - struct drbd_nl_cfg_req *, - struct drbd_nl_cfg_reply *); - int (*constructor)(struct drbd_nl_cfg_req *, - struct drbd_nl_cfg_reply *); - }; - int reply_body_size; -}; - -static struct cn_handler_struct cnd_table[] = { - [ P_primary ] = { CHT_MINOR, { &drbd_nl_primary }, 0 }, - [ P_secondary ] = { CHT_MINOR, { &drbd_nl_secondary }, 0 }, - [ P_disk_conf ] = { CHT_MINOR, { &drbd_nl_disk_conf }, 0 }, - [ P_detach ] = { CHT_MINOR, { &drbd_nl_detach }, 0 }, - [ P_net_conf ] = { CHT_CONN, { .conn_based = &drbd_nl_net_conf }, 0 }, - [ P_disconnect ] = { CHT_CONN, { .conn_based = &drbd_nl_disconnect }, 0 }, - [ P_resize ] = { CHT_MINOR, { &drbd_nl_resize }, 0 }, - [ P_syncer_conf ] = { CHT_MINOR, { &drbd_nl_syncer_conf },0 }, - [ P_invalidate ] = { CHT_MINOR, { &drbd_nl_invalidate }, 0 }, - [ P_invalidate_peer ] = { CHT_MINOR, { &drbd_nl_invalidate_peer },0 }, - [ P_pause_sync ] = { CHT_MINOR, { &drbd_nl_pause_sync }, 0 }, - [ P_resume_sync ] = { CHT_MINOR, { &drbd_nl_resume_sync },0 }, - [ P_suspend_io ] = { CHT_MINOR, { &drbd_nl_suspend_io }, 0 }, - [ P_resume_io ] = { CHT_MINOR, { &drbd_nl_resume_io }, 0 }, - [ P_outdate ] = { CHT_MINOR, { &drbd_nl_outdate }, 0 }, - [ P_get_config ] = { CHT_MINOR, { &drbd_nl_get_config }, - sizeof(struct syncer_conf_tag_len_struct) + - sizeof(struct disk_conf_tag_len_struct) + - sizeof(struct net_conf_tag_len_struct) }, - [ P_get_state ] = { CHT_MINOR, { &drbd_nl_get_state }, - sizeof(struct get_state_tag_len_struct) + - sizeof(struct sync_progress_tag_len_struct) }, - [ P_get_uuids ] = { CHT_MINOR, { &drbd_nl_get_uuids }, - sizeof(struct get_uuids_tag_len_struct) }, - [ P_get_timeout_flag ] = { CHT_MINOR, { &drbd_nl_get_timeout_flag }, - sizeof(struct get_timeout_flag_tag_len_struct)}, - [ P_start_ov ] = { CHT_MINOR, { &drbd_nl_start_ov }, 0 }, - [ P_new_c_uuid ] = { CHT_MINOR, { &drbd_nl_new_c_uuid }, 0 }, - [ P_new_connection ] = { CHT_CTOR, { .constructor = &drbd_nl_new_conn }, 0 }, - [ P_new_minor ] = { CHT_CONN, { .conn_based = &drbd_nl_new_minor }, 0 }, - [ P_del_minor ] = { CHT_MINOR, { &drbd_nl_del_minor }, 0 }, - [ P_del_connection ] = { CHT_CONN, { .conn_based = &drbd_nl_del_conn }, 0 }, -}; - -static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp) +void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib) { - struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)req->data; - struct cn_handler_struct *cm; - struct cn_msg *cn_reply; - struct drbd_nl_cfg_reply *reply; - struct drbd_conf *mdev; - struct drbd_tconn *tconn; - int retcode, rr; - int reply_size = sizeof(struct cn_msg) - + sizeof(struct drbd_nl_cfg_reply) - + sizeof(short int); + static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */ + struct sk_buff *msg; + struct drbd_genlmsghdr *d_out; + unsigned seq; + int err = -ENOMEM; - if (!try_module_get(THIS_MODULE)) { - printk(KERN_ERR "drbd: try_module_get() failed!\n"); - return; - } + seq = atomic_inc_return(&drbd_genl_seq); + msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); + if (!msg) + goto failed; - if (!cap_raised(current_cap(), CAP_SYS_ADMIN)) { - retcode = ERR_PERM; - goto fail; - } + err = -EMSGSIZE; + d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT); + if (!d_out) /* cannot happen, but anyways. */ + goto nla_put_failure; + d_out->minor = mdev_to_minor(mdev); + d_out->ret_code = 0; - if (nlp->packet_type >= P_nl_after_last_packet || - nlp->packet_type == P_return_code_only) { - retcode = ERR_PACKET_NR; - goto fail; - } + if (nla_put_status_info(msg, mdev, sib)) + goto nla_put_failure; + genlmsg_end(msg, d_out); + err = drbd_genl_multicast_events(msg, 0); + /* msg has been consumed or freed in netlink_broadcast() */ + if (err && err != -ESRCH) + goto failed; - cm = cnd_table + nlp->packet_type; - - /* This may happen if packet number is 0: */ - if (cm->minor_based == NULL) { - retcode = ERR_PACKET_NR; - goto fail; - } - - reply_size += cm->reply_body_size; - - /* allocation not in the IO path, cqueue thread context */ - cn_reply = kzalloc(reply_size, GFP_KERNEL); - if (!cn_reply) { - retcode = ERR_NOMEM; - goto fail; - } - reply = (struct drbd_nl_cfg_reply *) cn_reply->data; - - reply->packet_type = - cm->reply_body_size ? nlp->packet_type : P_return_code_only; - reply->minor = nlp->drbd_minor; - reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */ - /* reply->tag_list; might be modified by cm->function. */ - - retcode = ERR_MINOR_INVALID; - rr = 0; - switch (cm->type) { - case CHT_MINOR: - mdev = minor_to_mdev(nlp->drbd_minor); - if (!mdev) - goto fail; - rr = cm->minor_based(mdev, nlp, reply); - break; - case CHT_CONN: - tconn = conn_by_name(nlp->obj_name); - if (!tconn) { - retcode = ERR_CONN_NOT_KNOWN; - goto fail; - } - rr = cm->conn_based(tconn, nlp, reply); - break; - case CHT_CTOR: - rr = cm->constructor(nlp, reply); - break; - /* case CHT_RES: */ - } - - cn_reply->id = req->id; - cn_reply->seq = req->seq; - cn_reply->ack = req->ack + 1; - cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + rr; - cn_reply->flags = 0; - - rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); - if (rr && rr != -ESRCH) - printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr); - - kfree(cn_reply); - module_put(THIS_MODULE); return; - fail: - drbd_nl_send_reply(req, retcode); - module_put(THIS_MODULE); + +nla_put_failure: + nlmsg_free(msg); +failed: + dev_err(DEV, "Error %d while broadcasting event. " + "Event seq:%u sib_reason:%u\n", + err, seq, sib->sib_reason); } - -static atomic_t drbd_nl_seq = ATOMIC_INIT(2); /* two. */ - -static unsigned short * -__tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, - unsigned short len, int nul_terminated) -{ - unsigned short l = tag_descriptions[tag_number(tag)].max_len; - len = (len < l) ? len : l; - put_unaligned(tag, tl++); - put_unaligned(len, tl++); - memcpy(tl, data, len); - tl = (unsigned short*)((char*)tl + len); - if (nul_terminated) - *((char*)tl - 1) = 0; - return tl; -} - -static unsigned short * -tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, int len) -{ - return __tl_add_blob(tl, tag, data, len, 0); -} - -static unsigned short * -tl_add_str(unsigned short *tl, enum drbd_tags tag, const char *str) -{ - return __tl_add_blob(tl, tag, str, strlen(str)+1, 0); -} - -static unsigned short * -tl_add_int(unsigned short *tl, enum drbd_tags tag, const void *val) -{ - put_unaligned(tag, tl++); - switch(tag_type(tag)) { - case TT_INTEGER: - put_unaligned(sizeof(int), tl++); - put_unaligned(*(int *)val, (int *)tl); - tl = (unsigned short*)((char*)tl+sizeof(int)); - break; - case TT_INT64: - put_unaligned(sizeof(u64), tl++); - put_unaligned(*(u64 *)val, (u64 *)tl); - tl = (unsigned short*)((char*)tl+sizeof(u64)); - break; - default: - /* someone did something stupid. */ - ; - } - return tl; -} - -void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state) -{ - char buffer[sizeof(struct cn_msg)+ - sizeof(struct drbd_nl_cfg_reply)+ - sizeof(struct get_state_tag_len_struct)+ - sizeof(short int)]; - struct cn_msg *cn_reply = (struct cn_msg *) buffer; - struct drbd_nl_cfg_reply *reply = - (struct drbd_nl_cfg_reply *)cn_reply->data; - unsigned short *tl = reply->tag_list; - - /* dev_warn(DEV, "drbd_bcast_state() got called\n"); */ - - tl = get_state_to_tags((struct get_state *)&state, tl); - - put_unaligned(TT_END, tl++); /* Close the tag list */ - - cn_reply->id.idx = CN_IDX_DRBD; - cn_reply->id.val = CN_VAL_DRBD; - - cn_reply->seq = atomic_inc_return(&drbd_nl_seq); - cn_reply->ack = 0; /* not used here. */ - cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + - (int)((char *)tl - (char *)reply->tag_list); - cn_reply->flags = 0; - - reply->packet_type = P_get_state; - reply->minor = mdev_to_minor(mdev); - reply->ret_code = NO_ERROR; - - cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); -} - -void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name) -{ - char buffer[sizeof(struct cn_msg)+ - sizeof(struct drbd_nl_cfg_reply)+ - sizeof(struct call_helper_tag_len_struct)+ - sizeof(short int)]; - struct cn_msg *cn_reply = (struct cn_msg *) buffer; - struct drbd_nl_cfg_reply *reply = - (struct drbd_nl_cfg_reply *)cn_reply->data; - unsigned short *tl = reply->tag_list; - - /* dev_warn(DEV, "drbd_bcast_state() got called\n"); */ - - tl = tl_add_str(tl, T_helper, helper_name); - put_unaligned(TT_END, tl++); /* Close the tag list */ - - cn_reply->id.idx = CN_IDX_DRBD; - cn_reply->id.val = CN_VAL_DRBD; - - cn_reply->seq = atomic_inc_return(&drbd_nl_seq); - cn_reply->ack = 0; /* not used here. */ - cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + - (int)((char *)tl - (char *)reply->tag_list); - cn_reply->flags = 0; - - reply->packet_type = P_call_helper; - reply->minor = mdev_to_minor(mdev); - reply->ret_code = NO_ERROR; - - cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); -} - -void drbd_bcast_ee(struct drbd_conf *mdev, const char *reason, const int dgs, - const char *seen_hash, const char *calc_hash, - const struct drbd_peer_request *peer_req) -{ - struct cn_msg *cn_reply; - struct drbd_nl_cfg_reply *reply; - unsigned short *tl; - struct page *page; - unsigned len; - - if (!peer_req) - return; - if (!reason || !reason[0]) - return; - - /* apparently we have to memcpy twice, first to prepare the data for the - * struct cn_msg, then within cn_netlink_send from the cn_msg to the - * netlink skb. */ - /* receiver thread context, which is not in the writeout path (of this node), - * but may be in the writeout path of the _other_ node. - * GFP_NOIO to avoid potential "distributed deadlock". */ - cn_reply = kzalloc( - sizeof(struct cn_msg)+ - sizeof(struct drbd_nl_cfg_reply)+ - sizeof(struct dump_ee_tag_len_struct)+ - sizeof(short int), - GFP_NOIO); - - if (!cn_reply) { - dev_err(DEV, "could not kmalloc buffer for drbd_bcast_ee, " - "sector %llu, size %u\n", - (unsigned long long)peer_req->i.sector, - peer_req->i.size); - return; - } - - reply = (struct drbd_nl_cfg_reply*)cn_reply->data; - tl = reply->tag_list; - - tl = tl_add_str(tl, T_dump_ee_reason, reason); - tl = tl_add_blob(tl, T_seen_digest, seen_hash, dgs); - tl = tl_add_blob(tl, T_calc_digest, calc_hash, dgs); - tl = tl_add_int(tl, T_ee_sector, &peer_req->i.sector); - tl = tl_add_int(tl, T_ee_block_id, &peer_req->block_id); - - /* dump the first 32k */ - len = min_t(unsigned, peer_req->i.size, 32 << 10); - put_unaligned(T_ee_data, tl++); - put_unaligned(len, tl++); - - page = peer_req->pages; - page_chain_for_each(page) { - void *d = kmap_atomic(page, KM_USER0); - unsigned l = min_t(unsigned, len, PAGE_SIZE); - memcpy(tl, d, l); - kunmap_atomic(d, KM_USER0); - tl = (unsigned short*)((char*)tl + l); - len -= l; - if (len == 0) - break; - } - put_unaligned(TT_END, tl++); /* Close the tag list */ - - cn_reply->id.idx = CN_IDX_DRBD; - cn_reply->id.val = CN_VAL_DRBD; - - cn_reply->seq = atomic_inc_return(&drbd_nl_seq); - cn_reply->ack = 0; // not used here. - cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + - (int)((char*)tl - (char*)reply->tag_list); - cn_reply->flags = 0; - - reply->packet_type = P_dump_ee; - reply->minor = mdev_to_minor(mdev); - reply->ret_code = NO_ERROR; - - cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); - kfree(cn_reply); -} - -void drbd_bcast_sync_progress(struct drbd_conf *mdev) -{ - char buffer[sizeof(struct cn_msg)+ - sizeof(struct drbd_nl_cfg_reply)+ - sizeof(struct sync_progress_tag_len_struct)+ - sizeof(short int)]; - struct cn_msg *cn_reply = (struct cn_msg *) buffer; - struct drbd_nl_cfg_reply *reply = - (struct drbd_nl_cfg_reply *)cn_reply->data; - unsigned short *tl = reply->tag_list; - unsigned long rs_left; - unsigned int res; - - /* no local ref, no bitmap, no syncer progress, no broadcast. */ - if (!get_ldev(mdev)) - return; - drbd_get_syncer_progress(mdev, &rs_left, &res); - put_ldev(mdev); - - tl = tl_add_int(tl, T_sync_progress, &res); - put_unaligned(TT_END, tl++); /* Close the tag list */ - - cn_reply->id.idx = CN_IDX_DRBD; - cn_reply->id.val = CN_VAL_DRBD; - - cn_reply->seq = atomic_inc_return(&drbd_nl_seq); - cn_reply->ack = 0; /* not used here. */ - cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + - (int)((char *)tl - (char *)reply->tag_list); - cn_reply->flags = 0; - - reply->packet_type = P_sync_progress; - reply->minor = mdev_to_minor(mdev); - reply->ret_code = NO_ERROR; - - cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); -} - -int __init drbd_nl_init(void) -{ - static struct cb_id cn_id_drbd; - int err, try=10; - - cn_id_drbd.val = CN_VAL_DRBD; - do { - cn_id_drbd.idx = cn_idx; - err = cn_add_callback(&cn_id_drbd, "cn_drbd", &drbd_connector_callback); - if (!err) - break; - cn_idx = (cn_idx + CN_IDX_STEP); - } while (try--); - - if (err) { - printk(KERN_ERR "drbd: cn_drbd failed to register\n"); - return err; - } - - return 0; -} - -void drbd_nl_cleanup(void) -{ - static struct cb_id cn_id_drbd; - - cn_id_drbd.idx = cn_idx; - cn_id_drbd.val = CN_VAL_DRBD; - - cn_del_callback(&cn_id_drbd); -} - -void drbd_nl_send_reply(struct cn_msg *req, int ret_code) -{ - char buffer[sizeof(struct cn_msg)+sizeof(struct drbd_nl_cfg_reply)]; - struct cn_msg *cn_reply = (struct cn_msg *) buffer; - struct drbd_nl_cfg_reply *reply = - (struct drbd_nl_cfg_reply *)cn_reply->data; - int rr; - - memset(buffer, 0, sizeof(buffer)); - cn_reply->id = req->id; - - cn_reply->seq = req->seq; - cn_reply->ack = req->ack + 1; - cn_reply->len = sizeof(struct drbd_nl_cfg_reply); - cn_reply->flags = 0; - - reply->packet_type = P_return_code_only; - reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor; - reply->ret_code = ret_code; - - rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); - if (rr && rr != -ESRCH) - printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr); -} - diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index ffee90d6d37..a280bc238ac 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -970,6 +970,11 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, enum drbd_fencing_p fp; enum drbd_req_event what = NOTHING; union drbd_state nsm = (union drbd_state){ .i = -1 }; + struct sib_info sib; + + sib.sib_reason = SIB_STATE_CHANGE; + sib.os = os; + sib.ns = ns; if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) { clear_bit(CRASHED_PRIMARY, &mdev->flags); @@ -984,7 +989,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, } /* Inform userspace about the change... */ - drbd_bcast_state(mdev, ns); + drbd_bcast_event(mdev, &sib); if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) && (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)) diff --git a/include/linux/drbd.h b/include/linux/drbd.h index e192167e614..d28fdd8fcd4 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -51,7 +51,6 @@ #endif - extern const char *drbd_buildtag(void); #define REL_VERSION "8.3.11" #define API_VERSION 88 @@ -159,6 +158,7 @@ enum drbd_ret_code { ERR_CONN_IN_USE = 159, ERR_MINOR_CONFIGURED = 160, ERR_MINOR_EXISTS = 161, + ERR_INVALID_REQUEST = 162, /* insert new ones above this line */ AFTER_LAST_ERR_CODE @@ -349,37 +349,4 @@ enum drbd_timeout_flag { #define DRBD_MD_INDEX_FLEX_EXT -2 #define DRBD_MD_INDEX_FLEX_INT -3 -/* Start of the new netlink/connector stuff */ - -enum drbd_ncr_flags { - DRBD_NL_CREATE_DEVICE = 0x01, - DRBD_NL_SET_DEFAULTS = 0x02, -}; -#define DRBD_NL_OBJ_NAME_LEN 32 - - -/* For searching a vacant cn_idx value */ -#define CN_IDX_STEP 6977 - -struct drbd_nl_cfg_req { - int packet_type; - union { - struct { - unsigned int drbd_minor; - enum drbd_ncr_flags flags; - }; - struct { - char obj_name[DRBD_NL_OBJ_NAME_LEN]; - }; - }; - unsigned short tag_list[]; -}; - -struct drbd_nl_cfg_reply { - int packet_type; - unsigned int minor; - int ret_code; /* enum ret_code or set_st_err_t */ - unsigned short tag_list[]; /* only used with get_* calls */ -}; - #endif diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 8a86f659d36..c8c67239f61 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -95,7 +95,7 @@ static struct nla_policy s_name ## _nl_policy[] __read_mostly = \ #endif #endif -#if 1 +#ifdef GENL_MAGIC_DEBUG static void dprint_field(const char *dir, int nla_type, const char *name, void *valp) { From 8432b31457bee1f32cd2ede82e4a68b1af1d5225 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 8 Mar 2011 16:11:16 +0100 Subject: [PATCH 174/609] drbd: allow holes in minor and volume id allocation s/idr_get_new/idr_get_new_above/ Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_main.c | 39 +++++++++++++++++----------------- drivers/block/drbd/drbd_nl.c | 2 +- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 429fd8da6b7..ea638ce48e8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1509,6 +1509,7 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t); /* drbd_nl.c */ +extern int drbd_msg_put_info(const char *info); extern void drbd_suspend_io(struct drbd_conf *mdev); extern void drbd_resume_io(struct drbd_conf *mdev); extern char *ppsize(char *buf, unsigned long long size); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 9697ab87209..24c712b91fb 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2322,6 +2322,7 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, struct request_queue *q; int vnr_got = vnr; int minor_got = minor; + enum drbd_ret_code err = ERR_NOMEM; mdev = minor_to_mdev(minor); if (mdev) @@ -2389,35 +2390,35 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, INIT_LIST_HEAD(&mdev->current_epoch->list); mdev->epochs = 1; - if (!idr_pre_get(&tconn->volumes, GFP_KERNEL)) - goto out_no_vol_idr; - if (idr_get_new(&tconn->volumes, mdev, &vnr_got)) - goto out_no_vol_idr; - if (vnr_got != vnr) { - dev_err(DEV, "vnr_got (%d) != vnr (%d)\n", vnr_got, vnr); - goto out_idr_remove_vol; + if (!idr_pre_get(&minors, GFP_KERNEL)) + goto out_no_minor_idr; + if (idr_get_new_above(&minors, mdev, minor, &minor_got)) + goto out_no_minor_idr; + if (minor_got != minor) { + err = ERR_MINOR_EXISTS; + drbd_msg_put_info("requested minor exists already"); + goto out_idr_remove_minor; } - if (!idr_pre_get(&minors, GFP_KERNEL)) - goto out_idr_remove_vol; - if (idr_get_new(&minors, mdev, &minor_got)) - goto out_idr_remove_vol; - if (minor_got != minor) { - /* minor exists, or other idr strangeness? */ - dev_err(DEV, "available minor (%d) != requested minor (%d)\n", - minor_got, minor); + if (!idr_pre_get(&tconn->volumes, GFP_KERNEL)) goto out_idr_remove_minor; + if (idr_get_new_above(&tconn->volumes, mdev, vnr, &vnr_got)) + goto out_idr_remove_minor; + if (vnr_got != vnr) { + err = ERR_INVALID_REQUEST; + drbd_msg_put_info("requested volume exists already"); + goto out_idr_remove_vol; } add_disk(disk); return NO_ERROR; -out_idr_remove_minor: - idr_remove(&minors, minor_got); out_idr_remove_vol: idr_remove(&tconn->volumes, vnr_got); +out_idr_remove_minor: + idr_remove(&minors, minor_got); synchronize_rcu(); -out_no_vol_idr: +out_no_minor_idr: kfree(mdev->current_epoch); out_no_epoch: drbd_bm_cleanup(mdev); @@ -2429,7 +2430,7 @@ out_no_disk: blk_cleanup_queue(q); out_no_q: kfree(mdev); - return ERR_NOMEM; + return err; } /* counterpart of drbd_new_device. diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index f9be14248e3..f54d512ffce 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -109,7 +109,7 @@ static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info) /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only * reason it could fail was no space in skb, and there are 4k available. */ -static int drbd_msg_put_info(const char *info) +int drbd_msg_put_info(const char *info) { struct sk_buff *skb = adm_ctx.reply_skb; struct nlattr *nla; From 73d901b74f1070c8a664349b564ba6f8bc8ab283 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 7 Mar 2011 10:38:56 +0100 Subject: [PATCH 175/609] drbd: remove now unused connector related files Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_nl.h | 172 --------------------------------- include/linux/drbd_tag_magic.h | 84 ---------------- 2 files changed, 256 deletions(-) delete mode 100644 include/linux/drbd_nl.h delete mode 100644 include/linux/drbd_tag_magic.h diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h deleted file mode 100644 index 1216c7a432c..00000000000 --- a/include/linux/drbd_nl.h +++ /dev/null @@ -1,172 +0,0 @@ -/* - PAKET( name, - TYPE ( pn, pr, member ) - ... - ) - - You may never reissue one of the pn arguments -*/ - -#if !defined(NL_PACKET) || !defined(NL_STRING) || !defined(NL_INTEGER) || !defined(NL_BIT) || !defined(NL_INT64) -#error "The macros NL_PACKET, NL_STRING, NL_INTEGER, NL_INT64 and NL_BIT needs to be defined" -#endif - -NL_PACKET(primary, 1, - NL_BIT( 1, T_MAY_IGNORE, primary_force) -) - -NL_PACKET(secondary, 2, ) - -NL_PACKET(disk_conf, 3, - NL_INT64( 2, T_MAY_IGNORE, disk_size) - NL_STRING( 3, T_MANDATORY, backing_dev, 128) - NL_STRING( 4, T_MANDATORY, meta_dev, 128) - NL_INTEGER( 5, T_MANDATORY, meta_dev_idx) - NL_INTEGER( 6, T_MAY_IGNORE, on_io_error) - NL_INTEGER( 7, T_MAY_IGNORE, fencing) - NL_BIT( 37, T_MAY_IGNORE, use_bmbv) - NL_BIT( 53, T_MAY_IGNORE, no_disk_flush) - NL_BIT( 54, T_MAY_IGNORE, no_md_flush) - /* 55 max_bio_size was available in 8.2.6rc2 */ - NL_INTEGER( 56, T_MAY_IGNORE, max_bio_bvecs) - NL_BIT( 57, T_MAY_IGNORE, no_disk_barrier) - NL_BIT( 58, T_MAY_IGNORE, no_disk_drain) -) - -NL_PACKET(detach, 4, ) - -NL_PACKET(net_conf, 5, - NL_STRING( 8, T_MANDATORY, my_addr, 128) - NL_STRING( 9, T_MANDATORY, peer_addr, 128) - NL_STRING( 10, T_MAY_IGNORE, shared_secret, SHARED_SECRET_MAX) - NL_STRING( 11, T_MAY_IGNORE, cram_hmac_alg, SHARED_SECRET_MAX) - NL_STRING( 44, T_MAY_IGNORE, integrity_alg, SHARED_SECRET_MAX) - NL_INTEGER( 14, T_MAY_IGNORE, timeout) - NL_INTEGER( 15, T_MANDATORY, wire_protocol) - NL_INTEGER( 16, T_MAY_IGNORE, try_connect_int) - NL_INTEGER( 17, T_MAY_IGNORE, ping_int) - NL_INTEGER( 18, T_MAY_IGNORE, max_epoch_size) - NL_INTEGER( 19, T_MAY_IGNORE, max_buffers) - NL_INTEGER( 20, T_MAY_IGNORE, unplug_watermark) - NL_INTEGER( 21, T_MAY_IGNORE, sndbuf_size) - NL_INTEGER( 22, T_MAY_IGNORE, ko_count) - NL_INTEGER( 24, T_MAY_IGNORE, after_sb_0p) - NL_INTEGER( 25, T_MAY_IGNORE, after_sb_1p) - NL_INTEGER( 26, T_MAY_IGNORE, after_sb_2p) - NL_INTEGER( 39, T_MAY_IGNORE, rr_conflict) - NL_INTEGER( 40, T_MAY_IGNORE, ping_timeo) - NL_INTEGER( 67, T_MAY_IGNORE, rcvbuf_size) - NL_INTEGER( 81, T_MAY_IGNORE, on_congestion) - NL_INTEGER( 82, T_MAY_IGNORE, cong_fill) - NL_INTEGER( 83, T_MAY_IGNORE, cong_extents) - /* 59 addr_family was available in GIT, never released */ - NL_BIT( 60, T_MANDATORY, mind_af) - NL_BIT( 27, T_MAY_IGNORE, want_lose) - NL_BIT( 28, T_MAY_IGNORE, two_primaries) - NL_BIT( 41, T_MAY_IGNORE, always_asbp) - NL_BIT( 61, T_MAY_IGNORE, no_cork) - NL_BIT( 62, T_MANDATORY, auto_sndbuf_size) - NL_BIT( 70, T_MANDATORY, dry_run) -) - -NL_PACKET(disconnect, 6, - NL_BIT( 84, T_MAY_IGNORE, force) -) - -NL_PACKET(resize, 7, - NL_INT64( 29, T_MAY_IGNORE, resize_size) - NL_BIT( 68, T_MAY_IGNORE, resize_force) - NL_BIT( 69, T_MANDATORY, no_resync) -) - -NL_PACKET(syncer_conf, 8, - NL_INTEGER( 30, T_MAY_IGNORE, rate) - NL_INTEGER( 31, T_MAY_IGNORE, after) - NL_INTEGER( 32, T_MAY_IGNORE, al_extents) -/* NL_INTEGER( 71, T_MAY_IGNORE, dp_volume) - * NL_INTEGER( 72, T_MAY_IGNORE, dp_interval) - * NL_INTEGER( 73, T_MAY_IGNORE, throttle_th) - * NL_INTEGER( 74, T_MAY_IGNORE, hold_off_th) - * feature will be reimplemented differently with 8.3.9 */ - NL_STRING( 52, T_MAY_IGNORE, verify_alg, SHARED_SECRET_MAX) - NL_STRING( 51, T_MAY_IGNORE, cpu_mask, 32) - NL_STRING( 64, T_MAY_IGNORE, csums_alg, SHARED_SECRET_MAX) - NL_BIT( 65, T_MAY_IGNORE, use_rle) - NL_INTEGER( 75, T_MAY_IGNORE, on_no_data) - NL_INTEGER( 76, T_MAY_IGNORE, c_plan_ahead) - NL_INTEGER( 77, T_MAY_IGNORE, c_delay_target) - NL_INTEGER( 78, T_MAY_IGNORE, c_fill_target) - NL_INTEGER( 79, T_MAY_IGNORE, c_max_rate) - NL_INTEGER( 80, T_MAY_IGNORE, c_min_rate) -) - -NL_PACKET(invalidate, 9, ) -NL_PACKET(invalidate_peer, 10, ) -NL_PACKET(pause_sync, 11, ) -NL_PACKET(resume_sync, 12, ) -NL_PACKET(suspend_io, 13, ) -NL_PACKET(resume_io, 14, ) -NL_PACKET(outdate, 15, ) -NL_PACKET(get_config, 16, ) -NL_PACKET(get_state, 17, - NL_INTEGER( 33, T_MAY_IGNORE, state_i) -) - -NL_PACKET(get_uuids, 18, - NL_STRING( 34, T_MAY_IGNORE, uuids, (UI_SIZE*sizeof(__u64))) - NL_INTEGER( 35, T_MAY_IGNORE, uuids_flags) -) - -NL_PACKET(get_timeout_flag, 19, - NL_BIT( 36, T_MAY_IGNORE, use_degraded) -) - -NL_PACKET(call_helper, 20, - NL_STRING( 38, T_MAY_IGNORE, helper, 32) -) - -/* Tag nr 42 already allocated in drbd-8.1 development. */ - -NL_PACKET(sync_progress, 23, - NL_INTEGER( 43, T_MAY_IGNORE, sync_progress) -) - -NL_PACKET(dump_ee, 24, - NL_STRING( 45, T_MAY_IGNORE, dump_ee_reason, 32) - NL_STRING( 46, T_MAY_IGNORE, seen_digest, SHARED_SECRET_MAX) - NL_STRING( 47, T_MAY_IGNORE, calc_digest, SHARED_SECRET_MAX) - NL_INT64( 48, T_MAY_IGNORE, ee_sector) - NL_INT64( 49, T_MAY_IGNORE, ee_block_id) - NL_STRING( 50, T_MAY_IGNORE, ee_data, 32 << 10) -) - -NL_PACKET(start_ov, 25, - NL_INT64( 66, T_MAY_IGNORE, start_sector) -) - -NL_PACKET(new_c_uuid, 26, - NL_BIT( 63, T_MANDATORY, clear_bm) -) - -#ifdef NL_RESPONSE -NL_RESPONSE(return_code_only, 27) -#endif - -NL_PACKET(new_connection, 28, /* CHT_CTOR */ - NL_STRING( 85, T_MANDATORY, name, DRBD_NL_OBJ_NAME_LEN) -) - -NL_PACKET(new_minor, 29, /* CHT_CONN */ - NL_INTEGER( 86, T_MANDATORY, minor) - NL_INTEGER( 87, T_MANDATORY, vol_nr) -) - -NL_PACKET(del_minor, 30, ) /* CHT_MINOR */ -NL_PACKET(del_connection, 31, ) /* CHT_CONN */ - -#undef NL_PACKET -#undef NL_INTEGER -#undef NL_INT64 -#undef NL_BIT -#undef NL_STRING -#undef NL_RESPONSE diff --git a/include/linux/drbd_tag_magic.h b/include/linux/drbd_tag_magic.h deleted file mode 100644 index 06954319051..00000000000 --- a/include/linux/drbd_tag_magic.h +++ /dev/null @@ -1,84 +0,0 @@ -#ifndef DRBD_TAG_MAGIC_H -#define DRBD_TAG_MAGIC_H - -#define TT_END 0 -#define TT_REMOVED 0xE000 - -/* declare packet_type enums */ -enum packet_types { -#define NL_PACKET(name, number, fields) P_ ## name = number, -#define NL_RESPONSE(name, number) P_ ## name = number, -#define NL_INTEGER(pn, pr, member) -#define NL_INT64(pn, pr, member) -#define NL_BIT(pn, pr, member) -#define NL_STRING(pn, pr, member, len) -#include "drbd_nl.h" - P_nl_after_last_packet, -}; - -/* These struct are used to deduce the size of the tag lists: */ -#define NL_PACKET(name, number, fields) \ - struct name ## _tag_len_struct { fields }; -#define NL_INTEGER(pn, pr, member) \ - int member; int tag_and_len ## member; -#define NL_INT64(pn, pr, member) \ - __u64 member; int tag_and_len ## member; -#define NL_BIT(pn, pr, member) \ - unsigned char member:1; int tag_and_len ## member; -#define NL_STRING(pn, pr, member, len) \ - unsigned char member[len]; int member ## _len; \ - int tag_and_len ## member; -#include "linux/drbd_nl.h" - -/* declare tag-list-sizes */ -static const int tag_list_sizes[] = { -#define NL_PACKET(name, number, fields) 2 fields , -#define NL_INTEGER(pn, pr, member) + 4 + 4 -#define NL_INT64(pn, pr, member) + 4 + 8 -#define NL_BIT(pn, pr, member) + 4 + 1 -#define NL_STRING(pn, pr, member, len) + 4 + (len) -#include "drbd_nl.h" -}; - -/* The two highest bits are used for the tag type */ -#define TT_MASK 0xC000 -#define TT_INTEGER 0x0000 -#define TT_INT64 0x4000 -#define TT_BIT 0x8000 -#define TT_STRING 0xC000 -/* The next bit indicates if processing of the tag is mandatory */ -#define T_MANDATORY 0x2000 -#define T_MAY_IGNORE 0x0000 -#define TN_MASK 0x1fff -/* The remaining 13 bits are used to enumerate the tags */ - -#define tag_type(T) ((T) & TT_MASK) -#define tag_number(T) ((T) & TN_MASK) - -/* declare tag enums */ -#define NL_PACKET(name, number, fields) fields -enum drbd_tags { -#define NL_INTEGER(pn, pr, member) T_ ## member = pn | TT_INTEGER | pr , -#define NL_INT64(pn, pr, member) T_ ## member = pn | TT_INT64 | pr , -#define NL_BIT(pn, pr, member) T_ ## member = pn | TT_BIT | pr , -#define NL_STRING(pn, pr, member, len) T_ ## member = pn | TT_STRING | pr , -#include "drbd_nl.h" -}; - -struct tag { - const char *name; - int type_n_flags; - int max_len; -}; - -/* declare tag names */ -#define NL_PACKET(name, number, fields) fields -static const struct tag tag_descriptions[] = { -#define NL_INTEGER(pn, pr, member) [ pn ] = { #member, TT_INTEGER | pr, sizeof(int) }, -#define NL_INT64(pn, pr, member) [ pn ] = { #member, TT_INT64 | pr, sizeof(__u64) }, -#define NL_BIT(pn, pr, member) [ pn ] = { #member, TT_BIT | pr, sizeof(int) }, -#define NL_STRING(pn, pr, member, len) [ pn ] = { #member, TT_STRING | pr, (len) }, -#include "drbd_nl.h" -}; - -#endif From 543cc10b4cc5c60aa9fcc62705ccfb9998bf4697 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 10 Mar 2011 22:18:18 +0100 Subject: [PATCH 176/609] drbd: drbd_adm_get_status needs to show some more detail We want to see existing connection objects, even if they do not currently have volumes attached. Change the .dumpit variant of drbd_adm_get_status to iterate not over minor devices, but over connections + volumes. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 3 +- drivers/block/drbd/drbd_main.c | 15 +++-- drivers/block/drbd/drbd_nl.c | 115 +++++++++++++++++++++++++++------ 3 files changed, 106 insertions(+), 27 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index ea638ce48e8..5fa7ba0b17d 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -171,6 +171,7 @@ drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) { extern struct ratelimit_state drbd_ratelimit_state; extern struct idr minors; extern struct list_head drbd_tconns; +extern struct mutex drbd_cfg_mutex; /* on the wire */ enum drbd_packet { @@ -918,7 +919,7 @@ enum { struct drbd_tconn { /* is a resource from the config file */ char *name; /* Resource name */ - struct list_head all_tconn; /* List of all drbd_tconn, prot by global_state_lock */ + struct list_head all_tconn; /* linked on global drbd_tconns */ struct idr volumes; /* to mdev mapping */ enum drbd_conns cstate; /* Only C_STANDALONE to C_WF_REPORT_PARAMS */ struct mutex cstate_mutex; /* Protects graceful disconnects */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 24c712b91fb..113c7b46538 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -120,6 +120,7 @@ module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0 */ struct idr minors; struct list_head drbd_tconns; /* list of struct drbd_tconn */ +DEFINE_MUTEX(drbd_cfg_mutex); struct kmem_cache *drbd_request_cache; struct kmem_cache *drbd_ee_cache; /* peer requests */ @@ -2238,14 +2239,14 @@ struct drbd_tconn *conn_by_name(const char *name) if (!name || !name[0]) return NULL; - write_lock_irq(&global_state_lock); + mutex_lock(&drbd_cfg_mutex); list_for_each_entry(tconn, &drbd_tconns, all_tconn) { if (!strcmp(tconn->name, name)) goto found; } tconn = NULL; found: - write_unlock_irq(&global_state_lock); + mutex_unlock(&drbd_cfg_mutex); return tconn; } @@ -2285,9 +2286,9 @@ struct drbd_tconn *drbd_new_tconn(const char *name) drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker"); drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender"); - write_lock_irq(&global_state_lock); - list_add(&tconn->all_tconn, &drbd_tconns); - write_unlock_irq(&global_state_lock); + mutex_lock(&drbd_cfg_mutex); + list_add_tail(&tconn->all_tconn, &drbd_tconns); + mutex_unlock(&drbd_cfg_mutex); return tconn; @@ -2302,9 +2303,9 @@ fail: void drbd_free_tconn(struct drbd_tconn *tconn) { - write_lock_irq(&global_state_lock); + mutex_lock(&drbd_cfg_mutex); list_del(&tconn->all_tconn); - write_unlock_irq(&global_state_lock); + mutex_unlock(&drbd_cfg_mutex); idr_destroy(&tconn->volumes); free_cpumask_var(tconn->cpu_mask); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index f54d512ffce..244085ac12c 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1544,6 +1544,10 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) new_my_addr = (struct sockaddr *)&new_conf->my_addr; new_peer_addr = (struct sockaddr *)&new_conf->peer_addr; + + /* No need to take drbd_cfg_mutex here. All reconfiguration is + * strictly serialized on genl_lock(). We are protected against + * concurrent reconfiguration/addition/deletion */ list_for_each_entry(oconn, &drbd_tconns, all_tconn) { if (oconn == tconn) continue; @@ -2187,6 +2191,24 @@ int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info) return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED)); } +int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *conn_name, unsigned vnr) +{ + struct nlattr *nla; + nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT); + if (!nla) + goto nla_put_failure; + if (vnr != VOLUME_UNSPECIFIED) + NLA_PUT_U32(skb, T_ctx_volume, vnr); + NLA_PUT_STRING(skb, T_ctx_conn_name, conn_name); + nla_nest_end(skb, nla); + return 0; + +nla_put_failure: + if (nla) + nla_nest_cancel(skb, nla); + return -EMSGSIZE; +} + int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, const struct sib_info *sib) { @@ -2215,12 +2237,8 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, /* We need to add connection name and volume number information still. * Minor number is in drbd_genlmsghdr. */ - nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT); - if (!nla) + if (nla_put_drbd_cfg_context(skb, mdev->tconn->name, mdev->vnr)) goto nla_put_failure; - NLA_PUT_U32(skb, T_ctx_volume, mdev->vnr); - NLA_PUT_STRING(skb, T_ctx_conn_name, mdev->tconn->name); - nla_nest_end(skb, nla); if (got_ldev) if (disk_conf_to_skb(skb, &mdev->ldev->dc, exclude_sensitive)) @@ -2307,41 +2325,100 @@ int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb) { struct drbd_conf *mdev; struct drbd_genlmsghdr *dh; - int minor = cb->args[0]; + struct drbd_tconn *pos = (struct drbd_tconn*)cb->args[0]; + struct drbd_tconn *tconn = NULL; + struct drbd_tconn *tmp; + unsigned volume = cb->args[1]; - /* Open coded deferred single idr_for_each_entry iteration. + /* Open coded, deferred, iteration: + * list_for_each_entry_safe(tconn, tmp, &drbd_tconns, all_tconn) { + * idr_for_each_entry(&tconn->volumes, mdev, i) { + * ... + * } + * } + * where tconn is cb->args[0]; + * and i is cb->args[1]; + * * This may miss entries inserted after this dump started, * or entries deleted before they are reached. - * But we need to make sure the mdev won't disappear while - * we are looking at it. */ + * + * We need to make sure the mdev won't disappear while + * we are looking at it, and revalidate our iterators + * on each iteration. + */ + /* synchronize with drbd_new_tconn/drbd_free_tconn */ + mutex_lock(&drbd_cfg_mutex); + /* synchronize with drbd_delete_device */ rcu_read_lock(); - mdev = idr_get_next(&minors, &minor); - if (mdev) { +next_tconn: + /* revalidate iterator position */ + list_for_each_entry(tmp, &drbd_tconns, all_tconn) { + if (pos == NULL) { + /* first iteration */ + pos = tmp; + tconn = pos; + break; + } + if (tmp == pos) { + tconn = pos; + break; + } + } + if (tconn) { + mdev = idr_get_next(&tconn->volumes, &volume); + if (!mdev) { + /* No more volumes to dump on this tconn. + * Advance tconn iterator. */ + pos = list_entry(tconn->all_tconn.next, + struct drbd_tconn, all_tconn); + /* But, did we dump any volume on this tconn yet? */ + if (volume != 0) { + tconn = NULL; + volume = 0; + goto next_tconn; + } + } + dh = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, &drbd_genl_family, NLM_F_MULTI, DRBD_ADM_GET_STATUS); if (!dh) - goto errout; + goto out; - D_ASSERT(mdev->minor == minor); + if (!mdev) { + /* this is a tconn without a single volume */ + dh->minor = -1U; + dh->ret_code = NO_ERROR; + if (nla_put_drbd_cfg_context(skb, tconn->name, VOLUME_UNSPECIFIED)) + genlmsg_cancel(skb, dh); + else + genlmsg_end(skb, dh); + goto out; + } - dh->minor = minor; + D_ASSERT(mdev->vnr == volume); + D_ASSERT(mdev->tconn == tconn); + + dh->minor = mdev_to_minor(mdev); dh->ret_code = NO_ERROR; if (nla_put_status_info(skb, mdev, NULL)) { genlmsg_cancel(skb, dh); - goto errout; + goto out; } genlmsg_end(skb, dh); } -errout: +out: rcu_read_unlock(); - /* where to start idr_get_next with the next iteration */ - cb->args[0] = minor+1; + mutex_unlock(&drbd_cfg_mutex); + /* where to start the next iteration */ + cb->args[0] = (long)pos; + cb->args[1] = (pos == tconn) ? volume + 1 : 0; - /* No more minors found: empty skb. Which will terminate the dump. */ + /* No more tconns/volumes/minors found results in an empty skb. + * Which will terminate the dump. */ return skb->len; } From d0456c72df5fb3b800ba9b0ec2465fdbcaf29a7f Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 10 Mar 2011 23:28:13 +0100 Subject: [PATCH 177/609] drbd: simplify conn_all_vols_unconf, make it bool Get rid of a temporary variable and, funny bitand assignment. Just short circuit, returning false, once we encounter the first still configured volume. FIXME verify call sites for need of rcu_read_lock or stronger. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 16 +++++++--------- drivers/block/drbd/drbd_state.h | 2 +- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index a280bc238ac..c27d3778cae 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -47,20 +47,18 @@ static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_st static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns, const char **warn_sync_abort); -int conn_all_vols_unconf(struct drbd_tconn *tconn) +bool conn_all_vols_unconf(struct drbd_tconn *tconn) { struct drbd_conf *mdev; - int minor, uncfg = 1; + int minor; idr_for_each_entry(&tconn->volumes, mdev, minor) { - uncfg &= (mdev->state.disk == D_DISKLESS && - mdev->state.conn == C_STANDALONE && - mdev->state.role == R_SECONDARY); - if (!uncfg) - break; + if (mdev->state.disk != D_DISKLESS || + mdev->state.conn != C_STANDALONE || + mdev->state.role != R_SECONDARY) + return false; } - - return uncfg; + return true; } /** diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index d9536cd798e..55df0728bc8 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -91,7 +91,7 @@ conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_s enum chg_state_flags flags); extern void drbd_resume_al(struct drbd_conf *mdev); -extern int conn_all_vols_unconf(struct drbd_tconn *tconn); +extern bool conn_all_vols_unconf(struct drbd_tconn *tconn); /** * drbd_request_state() - Reqest a state change From cffec5b2fe8e851f54bb67a9012add062a97b960 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 10 Mar 2011 23:33:15 +0100 Subject: [PATCH 178/609] drbd: Allow a Diskless Secondary volume to be removed Even if the connection is still established. We should be able to reduce a volume from a replication group, without taking the whole group offline. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 244085ac12c..9b0312f2d2f 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -220,6 +220,8 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, drbd_msg_put_info("over-determined configuration context mismatch"); return ERR_INVALID_REQUEST; } + if (adm_ctx.mdev && !adm_ctx.tconn) + adm_ctx.tconn = adm_ctx.mdev->tconn; return NO_ERROR; fail: @@ -2643,10 +2645,15 @@ int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info) mdev = adm_ctx.mdev; if (mdev->state.disk == D_DISKLESS && - mdev->state.conn == C_STANDALONE && + /* no need to be mdev->state.conn == C_STANDALONE && + * we may want to delete a minor from a live replication group. + */ mdev->state.role == R_SECONDARY) { drbd_delete_device(mdev_to_minor(mdev)); retcode = NO_ERROR; + /* if this was the last volume of this connection, + * this will terminate all threads */ + conn_reconfig_done(adm_ctx.tconn); } else retcode = ERR_MINOR_CONFIGURED; out: From 38f19616d2c6d4e0ab2de01e4c2f159cd5aa8b49 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 14 Mar 2011 13:22:35 +0100 Subject: [PATCH 179/609] drbd: new-connection and new-minor succeed, if the object already exists Follow O_CREAT semantics when creating connection or minor device/volume objects. If we need O_CREAT|O_EXCL semantics some time down the road, we can add NLM_F_EXCL to the netlink message flags. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 9b0312f2d2f..0d3753be371 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2590,8 +2590,11 @@ int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info) goto out; if (adm_ctx.tconn) { - retcode = ERR_INVALID_REQUEST; - drbd_msg_put_info("connection exists"); + if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) { + retcode = ERR_INVALID_REQUEST; + drbd_msg_put_info("connection exists"); + } + /* else: still NO_ERROR */ goto out; } @@ -2626,6 +2629,15 @@ int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info) goto out; } + /* drbd_adm_prepare made sure already + * that mdev->tconn and mdev->vnr match the request. */ + if (adm_ctx.mdev) { + if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) + retcode = ERR_MINOR_EXISTS; + /* else: still NO_ERROR */ + goto out; + } + retcode = conn_new_minor(adm_ctx.tconn, dh->minor, adm_ctx.volume); out: drbd_adm_finish(info, retcode); From 527f4b24e51194a79f89ce5ce523236be31c511d Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 14 Mar 2011 13:58:03 +0100 Subject: [PATCH 180/609] drbd: bail out if a config requrest is over-determined, and not matching We have resources resp. connections, volumes, and minor numbers. A config request may specifies all three of them. If it turns out that the minor belongs to a different connection, or a different volume number in the same connection, that configuration request is invalid. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 0d3753be371..f965dfe4b5f 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -204,20 +204,20 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, } /* some more paranoia, if the request was over-determined */ + if (adm_ctx.mdev && adm_ctx.tconn && + adm_ctx.mdev->tconn != adm_ctx.tconn) { + pr_warning("request: minor=%u, conn=%s; but that minor belongs to connection %s\n", + adm_ctx.minor, adm_ctx.conn_name, adm_ctx.mdev->tconn->name); + drbd_msg_put_info("minor exists in different connection"); + return ERR_INVALID_REQUEST; + } if (adm_ctx.mdev && adm_ctx.volume != VOLUME_UNSPECIFIED && adm_ctx.volume != adm_ctx.mdev->vnr) { pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n", adm_ctx.minor, adm_ctx.volume, adm_ctx.mdev->vnr, adm_ctx.mdev->tconn->name); - drbd_msg_put_info("over-determined configuration context mismatch"); - return ERR_INVALID_REQUEST; - } - if (adm_ctx.mdev && adm_ctx.tconn && - adm_ctx.mdev->tconn != adm_ctx.tconn) { - pr_warning("request: minor=%u, conn=%s; but that minor belongs to connection %s\n", - adm_ctx.minor, adm_ctx.conn_name, adm_ctx.mdev->tconn->name); - drbd_msg_put_info("over-determined configuration context mismatch"); + drbd_msg_put_info("minor exists as different volume"); return ERR_INVALID_REQUEST; } if (adm_ctx.mdev && !adm_ctx.tconn) From 3c5e5f6afd242ea5944197a9b54033c1461b793c Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 15 Mar 2011 16:04:09 +0100 Subject: [PATCH 181/609] drbd: add forgotten spin_unlock somehow a "goto abort" was introduced with commit drbd: Extracted is_valid_transition() out of sanitize_state() which left drbd_req_state still holding the spin lock. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index c27d3778cae..0db6c4316f7 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -180,8 +180,10 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, os = mdev->state; ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL); rv = is_valid_transition(os, ns); - if (rv < SS_SUCCESS) + if (rv < SS_SUCCESS) { + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); goto abort; + } if (cl_wide_st_chg(mdev, os, ns)) { rv = is_valid_state(mdev, ns); From 85f75dd7630436b0aa46a6393099c0f23121f5f0 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 15 Mar 2011 16:26:37 +0100 Subject: [PATCH 182/609] drbd: introduce in-kernel "down" command This greatly simplifies deconfiguration of whole resources. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 2 - drivers/block/drbd/drbd_nl.c | 203 ++++++++++++++++++++++++--------- include/linux/drbd_genl.h | 2 + 3 files changed, 154 insertions(+), 53 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 113c7b46538..40b7b93def7 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2303,9 +2303,7 @@ fail: void drbd_free_tconn(struct drbd_tconn *tconn) { - mutex_lock(&drbd_cfg_mutex); list_del(&tconn->all_tconn); - mutex_unlock(&drbd_cfg_mutex); idr_destroy(&tconn->volumes); free_cpumask_var(tconn->cpu_mask); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index f965dfe4b5f..d952e877f8d 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -49,6 +49,7 @@ int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info); int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info); int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_down(struct sk_buff *skb, struct genl_info *info); int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info); int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info); @@ -1416,6 +1417,18 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) return 0; } +static int adm_detach(struct drbd_conf *mdev) +{ + enum drbd_ret_code retcode; + drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ + retcode = drbd_request_state(mdev, NS(disk, D_DISKLESS)); + wait_event(mdev->misc_wait, + mdev->state.disk != D_DISKLESS || + !atomic_read(&mdev->local_cnt)); + drbd_resume_io(mdev); + return retcode; +} + /* Detaching the disk is a process in multiple stages. First we need to lock * out application IO, in-flight IO, IO stuck in drbd_al_begin_io. * Then we transition to D_DISKLESS, and wait for put_ldev() to return all @@ -1423,7 +1436,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) * Only then we have finally detached. */ int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) { - struct drbd_conf *mdev; enum drbd_ret_code retcode; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); @@ -1432,13 +1444,7 @@ int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - mdev = adm_ctx.mdev; - drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ - retcode = drbd_request_state(mdev, NS(disk, D_DISKLESS)); - wait_event(mdev->misc_wait, - mdev->state.disk != D_DISKLESS || - !atomic_read(&mdev->local_cnt)); - drbd_resume_io(mdev); + retcode = adm_detach(adm_ctx.mdev); out: drbd_adm_finish(info, retcode); return 0; @@ -1680,10 +1686,49 @@ out: return 0; } +static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool force) +{ + enum drbd_state_rv rv; + if (force) { + spin_lock_irq(&tconn->req_lock); + if (tconn->cstate >= C_WF_CONNECTION) + _conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); + spin_unlock_irq(&tconn->req_lock); + return SS_SUCCESS; + } + + rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING), 0); + + switch (rv) { + case SS_NOTHING_TO_DO: + case SS_ALREADY_STANDALONE: + return SS_SUCCESS; + case SS_PRIMARY_NOP: + /* Our state checking code wants to see the peer outdated. */ + rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, + pdsk, D_OUTDATED), CS_VERBOSE); + break; + case SS_CW_FAILED_BY_PEER: + /* The peer probably wants to see us outdated. */ + rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, + disk, D_OUTDATED), 0); + if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) { + conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); + rv = SS_SUCCESS; + } + break; + default:; + /* no special handling necessary */ + } + + return rv; +} + int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) { struct disconnect_parms parms; struct drbd_tconn *tconn; + enum drbd_state_rv rv; enum drbd_ret_code retcode; int err; @@ -1704,35 +1749,8 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) } } - if (parms.force_disconnect) { - spin_lock_irq(&tconn->req_lock); - if (tconn->cstate >= C_WF_CONNECTION) - _conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); - spin_unlock_irq(&tconn->req_lock); - goto done; - } - - retcode = conn_request_state(tconn, NS(conn, C_DISCONNECTING), 0); - - if (retcode == SS_NOTHING_TO_DO) - goto done; - else if (retcode == SS_ALREADY_STANDALONE) - goto done; - else if (retcode == SS_PRIMARY_NOP) { - /* Our state checking code wants to see the peer outdated. */ - retcode = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, - pdsk, D_OUTDATED), CS_VERBOSE); - } else if (retcode == SS_CW_FAILED_BY_PEER) { - /* The peer probably wants to see us outdated. */ - retcode = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, - disk, D_OUTDATED), 0); - if (retcode == SS_IS_DISKLESS || retcode == SS_LOWER_THAN_OUTDATED) { - conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); - retcode = SS_SUCCESS; - } - } - - if (retcode < SS_SUCCESS) + rv = conn_try_disconnect(tconn, parms.force_disconnect); + if (rv < SS_SUCCESS) goto fail; if (wait_event_interruptible(tconn->ping_wait, @@ -1743,7 +1761,6 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) goto fail; } - done: retcode = NO_ERROR; fail: drbd_adm_finish(info, retcode); @@ -2644,9 +2661,21 @@ out: return 0; } +static enum drbd_ret_code adm_delete_minor(struct drbd_conf *mdev) +{ + if (mdev->state.disk == D_DISKLESS && + /* no need to be mdev->state.conn == C_STANDALONE && + * we may want to delete a minor from a live replication group. + */ + mdev->state.role == R_SECONDARY) { + drbd_delete_device(mdev_to_minor(mdev)); + return NO_ERROR; + } else + return ERR_MINOR_CONFIGURED; +} + int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info) { - struct drbd_conf *mdev; enum drbd_ret_code retcode; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); @@ -2655,19 +2684,89 @@ int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - mdev = adm_ctx.mdev; - if (mdev->state.disk == D_DISKLESS && - /* no need to be mdev->state.conn == C_STANDALONE && - * we may want to delete a minor from a live replication group. - */ - mdev->state.role == R_SECONDARY) { - drbd_delete_device(mdev_to_minor(mdev)); - retcode = NO_ERROR; - /* if this was the last volume of this connection, - * this will terminate all threads */ + mutex_lock(&drbd_cfg_mutex); + retcode = adm_delete_minor(adm_ctx.mdev); + mutex_unlock(&drbd_cfg_mutex); + /* if this was the last volume of this connection, + * this will terminate all threads */ + if (retcode == NO_ERROR) conn_reconfig_done(adm_ctx.tconn); - } else - retcode = ERR_MINOR_CONFIGURED; +out: + drbd_adm_finish(info, retcode); + return 0; +} + +int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) +{ + enum drbd_ret_code retcode; + enum drbd_state_rv rv; + struct drbd_conf *mdev; + unsigned i; + + retcode = drbd_adm_prepare(skb, info, 0); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + if (!adm_ctx.tconn) { + retcode = ERR_CONN_NOT_KNOWN; + goto out; + } + + mutex_lock(&drbd_cfg_mutex); + /* demote */ + idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { + retcode = drbd_set_role(mdev, R_SECONDARY, 0); + if (retcode < SS_SUCCESS) { + drbd_msg_put_info("failed to demote"); + goto out_unlock; + } + } + + /* disconnect */ + rv = conn_try_disconnect(adm_ctx.tconn, 0); + if (rv < SS_SUCCESS) { + retcode = rv; /* enum type mismatch! */ + drbd_msg_put_info("failed to disconnect"); + goto out_unlock; + } + + /* detach */ + idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { + rv = adm_detach(mdev); + if (rv < SS_SUCCESS) { + retcode = rv; /* enum type mismatch! */ + drbd_msg_put_info("failed to detach"); + goto out_unlock; + } + } + + /* delete volumes */ + idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { + retcode = adm_delete_minor(mdev); + if (retcode != NO_ERROR) { + /* "can not happen" */ + drbd_msg_put_info("failed to delete volume"); + goto out_unlock; + } + } + + /* stop all threads */ + conn_reconfig_done(adm_ctx.tconn); + + /* delete connection */ + if (conn_lowest_minor(adm_ctx.tconn) < 0) { + drbd_free_tconn(adm_ctx.tconn); + retcode = NO_ERROR; + } else { + /* "can not happen" */ + retcode = ERR_CONN_IN_USE; + drbd_msg_put_info("failed to delete connection"); + goto out_unlock; + } +out_unlock: + mutex_unlock(&drbd_cfg_mutex); out: drbd_adm_finish(info, retcode); return 0; @@ -2683,12 +2782,14 @@ int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; + mutex_lock(&drbd_cfg_mutex); if (conn_lowest_minor(adm_ctx.tconn) < 0) { drbd_free_tconn(adm_ctx.tconn); retcode = NO_ERROR; } else { retcode = ERR_CONN_IN_USE; } + mutex_unlock(&drbd_cfg_mutex); out: drbd_adm_finish(info, retcode); diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 84e16848f7a..a07d69279b1 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -347,3 +347,5 @@ GENL_op(DRBD_ADM_OUTDATE, 25, GENL_doit(drbd_adm_outdate), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_DOWN, 27, GENL_doit(drbd_adm_down), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) From 2de876efa68d59fa4339837c4a94b42015ab1836 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 15 Mar 2011 14:38:01 +0100 Subject: [PATCH 183/609] drbd: Ignore packets for non existing volumes Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 33 ++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1aace37c516..57691a3b8f3 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3794,27 +3794,40 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, return ok; } -static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd, - unsigned int data_size) +static int _tconn_receive_skip(struct drbd_tconn *tconn, unsigned int data_size) { /* TODO zero copy sink :) */ static char sink[128]; int size, want, r; - dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n", - cmd, data_size); - size = data_size; while (size > 0) { want = min_t(int, size, sizeof(sink)); - r = drbd_recv(mdev->tconn, sink, want); - if (!expect(r > 0)) + r = drbd_recv(tconn, sink, want); + if (r <= 0) break; size -= r; } return size == 0; } +static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) +{ + dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n", + cmd, data_size); + + return _tconn_receive_skip(mdev->tconn, data_size); +} + +static int tconn_receive_skip(struct drbd_tconn *tconn, enum drbd_packet cmd, unsigned int data_size) +{ + conn_warn(tconn, "skipping packet for non existing volume type %d, l: %d!\n", + cmd, data_size); + + return _tconn_receive_skip(tconn, data_size); +} + static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd, unsigned int data_size) { @@ -3890,6 +3903,7 @@ static struct data_cmd drbd_cmd_handler[] = { static void drbdd(struct drbd_tconn *tconn) { struct p_header *header = &tconn->data.rbuf.header; + struct drbd_conf *mdev; struct packet_info pi; size_t shs; /* sub header size */ int rv; @@ -3920,7 +3934,10 @@ static void drbdd(struct drbd_tconn *tconn) } } - rv = drbd_cmd_handler[pi.cmd].function(vnr_to_mdev(tconn, pi.vnr), pi.cmd, pi.size - shs); + mdev = vnr_to_mdev(tconn, pi.vnr); + rv = mdev ? + drbd_cmd_handler[pi.cmd].function(mdev, pi.cmd, pi.size - shs) : + tconn_receive_skip(tconn, pi.cmd, pi.size - shs); if (unlikely(!rv)) { conn_err(tconn, "error receiving %s, l: %d!\n", From 082a3439a2d34c465c2f5bc10a324fb4d96b2332 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 15 Mar 2011 16:05:42 +0100 Subject: [PATCH 184/609] drbd: process_done_ee() has to handle unconfigured devices now Took the chance and converted tconn_process_done_ee() to use idr_for_each_entry() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 44 ++++++++++++------------------ 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 57691a3b8f3..3c739ab4de5 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -404,7 +404,7 @@ static int drbd_process_done_ee(struct drbd_conf *mdev) LIST_HEAD(work_list); LIST_HEAD(reclaimed); struct drbd_peer_request *peer_req, *t; - int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS); + int ok = 1; spin_lock_irq(&mdev->tconn->req_lock); reclaim_net_ee(mdev, &reclaimed); @@ -4667,37 +4667,27 @@ static int got_skip(struct drbd_conf *mdev, enum drbd_packet cmd) return true; } -static int _drbd_process_done_ee(int vnr, void *p, void *data) -{ - struct drbd_conf *mdev = (struct drbd_conf *)p; - return !drbd_process_done_ee(mdev); -} - -static int _check_ee_empty(int vnr, void *p, void *data) -{ - struct drbd_conf *mdev = (struct drbd_conf *)p; - struct drbd_tconn *tconn = mdev->tconn; - int not_empty; - - spin_lock_irq(&tconn->req_lock); - not_empty = !list_empty(&mdev->done_ee); - spin_unlock_irq(&tconn->req_lock); - - return not_empty; -} - static int tconn_process_done_ee(struct drbd_tconn *tconn) { - int not_empty, err; + struct drbd_conf *mdev; + int i, not_empty = 0; do { clear_bit(SIGNAL_ASENDER, &tconn->flags); flush_signals(current); - err = idr_for_each(&tconn->volumes, _drbd_process_done_ee, NULL); - if (err) - return err; + idr_for_each_entry(&tconn->volumes, mdev, i) { + if (!drbd_process_done_ee(mdev)) + return 1; /* error */ + } set_bit(SIGNAL_ASENDER, &tconn->flags); - not_empty = idr_for_each(&tconn->volumes, _check_ee_empty, NULL); + + spin_lock_irq(&tconn->req_lock); + idr_for_each_entry(&tconn->volumes, mdev, i) { + not_empty = !list_empty(&mdev->done_ee); + if (not_empty) + break; + } + spin_unlock_irq(&tconn->req_lock); } while (not_empty); return 0; @@ -4759,8 +4749,10 @@ int drbd_asender(struct drbd_thread *thi) much to send */ if (!tconn->net_conf->no_cork) drbd_tcp_cork(tconn->meta.socket); - if (tconn_process_done_ee(tconn)) + if (tconn_process_done_ee(tconn)) { + conn_err(tconn, "tconn_process_done_ee() failed\n"); goto reconnect; + } /* but unconditionally uncork unless disabled */ if (!tconn->net_conf->no_cork) drbd_tcp_uncork(tconn->meta.socket); From 2325eb661f745ed543ef33642f5fe0dd6d1f0850 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 15 Mar 2011 16:56:18 +0100 Subject: [PATCH 185/609] drbd: New minors have to intherit the connection state form their connection Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_main.c | 5 +++++ drivers/block/drbd/drbd_receiver.c | 5 +++-- drivers/block/drbd/drbd_state.c | 4 ++-- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 5fa7ba0b17d..278e7acc7f3 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1595,6 +1595,7 @@ extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled); extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed); extern void conn_flush_workqueue(struct drbd_tconn *tconn); +extern int drbd_connected(int vnr, void *p, void *data); static inline void drbd_flush_workqueue(struct drbd_conf *mdev) { conn_flush_workqueue(mdev->tconn); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 40b7b93def7..79a0e042252 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2410,6 +2410,11 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, } add_disk(disk); + /* inherit the connection state */ + mdev->state.conn = tconn->cstate; + if (mdev->state.conn == C_WF_REPORT_PARAMS) + drbd_connected(vnr, mdev, tconn); + return NO_ERROR; out_idr_remove_vol: diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 3c739ab4de5..54f45fcf32b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -744,8 +744,9 @@ static int drbd_socket_okay(struct socket **sock) return false; } } - -static int drbd_connected(int vnr, void *p, void *data) +/* Gets called if a connection is established, or if a new minor gets created + in a connection */ +int drbd_connected(int vnr, void *p, void *data) { struct drbd_conf *mdev = (struct drbd_conf *)p; int ok = 1; diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 0db6c4316f7..886b996ec7b 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -432,8 +432,8 @@ is_valid_soft_transition(union drbd_state os, union drbd_state ns) if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED) rv = SS_IN_TRANSIENT_STATE; - if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS) - rv = SS_IN_TRANSIENT_STATE; + /* if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS) + rv = SS_IN_TRANSIENT_STATE; */ if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED) rv = SS_NEED_CONNECTION; From 34f646bd5731cfd456fbacc95c12a2cf52275e54 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 15 Mar 2011 17:27:47 +0100 Subject: [PATCH 186/609] drbd: Allow two diskless minors to be connected In the context of drbd-8.4 it no longer makes sense to dissalow that. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 54f45fcf32b..dfa092087d7 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3130,12 +3130,6 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd, p_size = be64_to_cpu(p->d_size); p_usize = be64_to_cpu(p->u_size); - if (p_size == 0 && mdev->state.disk == D_DISKLESS) { - dev_err(DEV, "some backing storage is needed\n"); - drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); - return false; - } - /* just store the peer's disk size for now. * we still need to figure out whether we accept that. */ mdev->p_size = p_size; From 38fa9988fa838324a0cce6e2f9d3c674230659d5 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 15 Mar 2011 18:24:49 +0100 Subject: [PATCH 187/609] drbd: Do not modify the connection state with something else that conn_request_state() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 2 +- drivers/block/drbd/drbd_receiver.c | 16 ++++++++-------- drivers/block/drbd/drbd_worker.c | 10 ++-------- 3 files changed, 11 insertions(+), 17 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index c1a90616776..89f1a6904a4 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -769,7 +769,7 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, dump_stack(); lc_put(mdev->resync, &ext->lce); - drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); return; } } else { diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index dfa092087d7..ac2a25f6b25 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2922,7 +2922,7 @@ static int receive_protocol(struct drbd_conf *mdev, enum drbd_packet cmd, return true; disconnect: - drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); return false; } @@ -3101,7 +3101,7 @@ disconnect: crypto_free_hash(csums_tfm); /* but free the verify_tfm again, if csums_tfm did not work out */ crypto_free_hash(verify_tfm); - drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); return false; } @@ -3161,7 +3161,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd, mdev->state.disk >= D_OUTDATED && mdev->state.conn < C_CONNECTED) { dev_err(DEV, "The peer's disk size is too small!\n"); - drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); mdev->ldev->dc.disk_size = my_usize; put_ldev(mdev); return false; @@ -3237,7 +3237,7 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd, (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) { dev_err(DEV, "Can only connect to data with current UUID=%016llX\n", (unsigned long long)mdev->ed_uuid); - drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); return false; } @@ -3442,7 +3442,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd, if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags)) return false; D_ASSERT(os.conn == C_WF_REPORT_PARAMS); - drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); return false; } } @@ -3467,7 +3467,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd, tl_clear(mdev->tconn); drbd_uuid_new_current(mdev); clear_bit(NEW_CUR_UUID, &mdev->flags); - drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0)); + conn_request_state(mdev->tconn, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD); return false; } rv = _drbd_set_state(mdev, ns, cs_flags, NULL); @@ -3475,7 +3475,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd, spin_unlock_irq(&mdev->tconn->req_lock); if (rv < SS_SUCCESS) { - drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); return false; } @@ -3648,7 +3648,7 @@ decode_bitmap_c(struct drbd_conf *mdev, * during all our tests. */ dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding); - drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); + conn_request_state(mdev->tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD); return -EIO; } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index e459cb2076b..a730520e468 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1290,12 +1290,6 @@ int w_send_read_req(struct drbd_work *w, int cancel) ok = drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size, (unsigned long)req); - if (!ok) { - /* ?? we set C_TIMEOUT or C_BROKEN_PIPE in drbd_send(); - * so this is probably redundant */ - if (mdev->state.conn >= C_CONNECTED) - drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE)); - } req_mod(req, ok ? HANDED_OVER_TO_NETWORK : SEND_FAILED); return ok; @@ -1510,7 +1504,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) if (r > 0) { dev_info(DEV, "before-resync-target handler returned %d, " "dropping connection.\n", r); - drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); return; } } else /* C_SYNC_SOURCE */ { @@ -1523,7 +1517,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) } else { dev_info(DEV, "before-resync-source handler returned %d, " "dropping connection.\n", r); - drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); return; } } From 8169e41b3e59ee74a31270804a1b5ccef9ae9fce Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 15 Mar 2011 18:40:27 +0100 Subject: [PATCH 188/609] drbd: Moved CONN_DRY_RUN to the per connection (tconn) flags Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_receiver.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 278e7acc7f3..d6e7e657e7a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -791,7 +791,6 @@ enum { RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ RESIZE_PENDING, /* Size change detected locally, waiting for the response from * the peer, if it changed there as well. */ - CONN_DRY_RUN, /* Expect disconnect after resync handshake. */ NEW_CUR_UUID, /* Create new current UUID when thawing IO */ AL_SUSPENDED, /* Activity logging is currently suspended. */ AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */ @@ -915,6 +914,7 @@ enum { * but worker thread is still handling the cleanup. * reconfiguring (nl_disk_conf, nl_net_conf) is dissalowed, * while this is set. */ + CONN_DRY_RUN, /* Expect disconnect after resync handshake. */ }; struct drbd_tconn { /* is a resource from the config file */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index ac2a25f6b25..4bac2f56ca2 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2800,7 +2800,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } } - if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) { + if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) { if (hg == 0) dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n"); else @@ -2869,10 +2869,10 @@ static int receive_protocol(struct drbd_conf *mdev, enum drbd_packet cmd, cf = be32_to_cpu(p->conn_flags); p_want_lose = cf & CF_WANT_LOSE; - clear_bit(CONN_DRY_RUN, &mdev->flags); + clear_bit(CONN_DRY_RUN, &mdev->tconn->flags); if (cf & CF_DRY_RUN) - set_bit(CONN_DRY_RUN, &mdev->flags); + set_bit(CONN_DRY_RUN, &mdev->tconn->flags); if (p_proto != mdev->tconn->net_conf->wire_protocol) { dev_err(DEV, "incompatible communication protocols\n"); @@ -3439,7 +3439,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd, peer_state.disk = D_DISKLESS; real_peer_disk = D_DISKLESS; } else { - if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags)) + if (test_and_clear_bit(CONN_DRY_RUN, &mdev->tconn->flags)) return false; D_ASSERT(os.conn == C_WF_REPORT_PARAMS); conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); From d9ae84e7909cddccaea617acaeb0737979707fdd Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 15 Mar 2011 18:50:22 +0100 Subject: [PATCH 189/609] drbd: Allow packet handler functions that take a connection That is necessary in case a connection does not have a volume 0 Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 75 +++++++++++++++++------------- 1 file changed, 42 insertions(+), 33 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 4bac2f56ca2..58727e98711 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3853,39 +3853,45 @@ static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd, return true; } -typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packet cmd, - unsigned int to_receive); - struct data_cmd { int expect_payload; size_t pkt_size; - drbd_cmd_handler_f function; + enum { + MDEV, + CONN, + } type; + union { + int (*mdev_fn)(struct drbd_conf *, enum drbd_packet cmd, + unsigned int to_receive); + int (*conn_fn)(struct drbd_tconn *, enum drbd_packet cmd, + unsigned int to_receive); + }; }; static struct data_cmd drbd_cmd_handler[] = { - [P_DATA] = { 1, sizeof(struct p_data), receive_Data }, - [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply }, - [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } , - [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } , - [P_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } , - [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } , - [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), receive_UnplugRemote }, - [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, - [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, - [P_SYNC_PARAM] = { 1, sizeof(struct p_header), receive_SyncParam }, - [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), receive_SyncParam }, - [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol }, - [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids }, - [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes }, - [P_STATE] = { 0, sizeof(struct p_state), receive_state }, - [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state }, - [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid }, - [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, - [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest }, - [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest }, - [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip }, - [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync }, - [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state }, + [P_DATA] = { 1, sizeof(struct p_data), MDEV, { receive_Data } }, + [P_DATA_REPLY] = { 1, sizeof(struct p_data), MDEV, { receive_DataReply } }, + [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), MDEV, { receive_RSDataReply } } , + [P_BARRIER] = { 0, sizeof(struct p_barrier), MDEV, { receive_Barrier } } , + [P_BITMAP] = { 1, sizeof(struct p_header), MDEV, { receive_bitmap } } , + [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), MDEV, { receive_bitmap } } , + [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), MDEV, { receive_UnplugRemote } }, + [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } }, + [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } }, + [P_SYNC_PARAM] = { 1, sizeof(struct p_header), MDEV, { receive_SyncParam } }, + [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), MDEV, { receive_SyncParam } }, + [P_PROTOCOL] = { 1, sizeof(struct p_protocol), MDEV, { receive_protocol } }, + [P_UUIDS] = { 0, sizeof(struct p_uuids), MDEV, { receive_uuids } }, + [P_SIZES] = { 0, sizeof(struct p_sizes), MDEV, { receive_sizes } }, + [P_STATE] = { 0, sizeof(struct p_state), MDEV, { receive_state } }, + [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), MDEV, { receive_req_state } }, + [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), MDEV, { receive_sync_uuid } }, + [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } }, + [P_OV_REPLY] = { 1, sizeof(struct p_block_req), MDEV, { receive_DataRequest } }, + [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), MDEV, { receive_DataRequest } }, + [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), MDEV, { receive_skip } }, + [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), MDEV, { receive_out_of_sync } }, + [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), MDEV, { receive_req_state } }, }; /* All handler functions that expect a sub-header get that sub-heder in @@ -3898,7 +3904,6 @@ static struct data_cmd drbd_cmd_handler[] = { static void drbdd(struct drbd_tconn *tconn) { struct p_header *header = &tconn->data.rbuf.header; - struct drbd_conf *mdev; struct packet_info pi; size_t shs; /* sub header size */ int rv; @@ -3909,7 +3914,7 @@ static void drbdd(struct drbd_tconn *tconn) goto err_out; if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || - !drbd_cmd_handler[pi.cmd].function)) { + !drbd_cmd_handler[pi.cmd].mdev_fn)) { conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size); goto err_out; } @@ -3929,10 +3934,14 @@ static void drbdd(struct drbd_tconn *tconn) } } - mdev = vnr_to_mdev(tconn, pi.vnr); - rv = mdev ? - drbd_cmd_handler[pi.cmd].function(mdev, pi.cmd, pi.size - shs) : - tconn_receive_skip(tconn, pi.cmd, pi.size - shs); + if (drbd_cmd_handler[pi.cmd].type == CONN) { + rv = drbd_cmd_handler[pi.cmd].conn_fn(tconn, pi.cmd, pi.size - shs); + } else { + struct drbd_conf *mdev = vnr_to_mdev(tconn, pi.vnr); + rv = mdev ? + drbd_cmd_handler[pi.cmd].mdev_fn(mdev, pi.cmd, pi.size - shs) : + tconn_receive_skip(tconn, pi.cmd, pi.size - shs); + } if (unlikely(!rv)) { conn_err(tconn, "error receiving %s, l: %d!\n", From 7204624c5e88fdfd33a3badc7989148f6962b523 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 15 Mar 2011 18:51:47 +0100 Subject: [PATCH 190/609] drbd: Converted receive_protocol() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 46 +++++++++++++++--------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 58727e98711..4a985d3b67c 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2853,10 +2853,10 @@ static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self) return 1; } -static int receive_protocol(struct drbd_conf *mdev, enum drbd_packet cmd, +static int receive_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd, unsigned int data_size) { - struct p_protocol *p = &mdev->tconn->data.rbuf.protocol; + struct p_protocol *p = &tconn->data.rbuf.protocol; int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; int p_want_lose, p_two_primaries, cf; char p_integrity_alg[SHARED_SECRET_MAX] = ""; @@ -2869,60 +2869,60 @@ static int receive_protocol(struct drbd_conf *mdev, enum drbd_packet cmd, cf = be32_to_cpu(p->conn_flags); p_want_lose = cf & CF_WANT_LOSE; - clear_bit(CONN_DRY_RUN, &mdev->tconn->flags); + clear_bit(CONN_DRY_RUN, &tconn->flags); if (cf & CF_DRY_RUN) - set_bit(CONN_DRY_RUN, &mdev->tconn->flags); + set_bit(CONN_DRY_RUN, &tconn->flags); - if (p_proto != mdev->tconn->net_conf->wire_protocol) { - dev_err(DEV, "incompatible communication protocols\n"); + if (p_proto != tconn->net_conf->wire_protocol) { + conn_err(tconn, "incompatible communication protocols\n"); goto disconnect; } - if (cmp_after_sb(p_after_sb_0p, mdev->tconn->net_conf->after_sb_0p)) { - dev_err(DEV, "incompatible after-sb-0pri settings\n"); + if (cmp_after_sb(p_after_sb_0p, tconn->net_conf->after_sb_0p)) { + conn_err(tconn, "incompatible after-sb-0pri settings\n"); goto disconnect; } - if (cmp_after_sb(p_after_sb_1p, mdev->tconn->net_conf->after_sb_1p)) { - dev_err(DEV, "incompatible after-sb-1pri settings\n"); + if (cmp_after_sb(p_after_sb_1p, tconn->net_conf->after_sb_1p)) { + conn_err(tconn, "incompatible after-sb-1pri settings\n"); goto disconnect; } - if (cmp_after_sb(p_after_sb_2p, mdev->tconn->net_conf->after_sb_2p)) { - dev_err(DEV, "incompatible after-sb-2pri settings\n"); + if (cmp_after_sb(p_after_sb_2p, tconn->net_conf->after_sb_2p)) { + conn_err(tconn, "incompatible after-sb-2pri settings\n"); goto disconnect; } - if (p_want_lose && mdev->tconn->net_conf->want_lose) { - dev_err(DEV, "both sides have the 'want_lose' flag set\n"); + if (p_want_lose && tconn->net_conf->want_lose) { + conn_err(tconn, "both sides have the 'want_lose' flag set\n"); goto disconnect; } - if (p_two_primaries != mdev->tconn->net_conf->two_primaries) { - dev_err(DEV, "incompatible setting of the two-primaries options\n"); + if (p_two_primaries != tconn->net_conf->two_primaries) { + conn_err(tconn, "incompatible setting of the two-primaries options\n"); goto disconnect; } - if (mdev->tconn->agreed_pro_version >= 87) { - unsigned char *my_alg = mdev->tconn->net_conf->integrity_alg; + if (tconn->agreed_pro_version >= 87) { + unsigned char *my_alg = tconn->net_conf->integrity_alg; - if (drbd_recv(mdev->tconn, p_integrity_alg, data_size) != data_size) + if (drbd_recv(tconn, p_integrity_alg, data_size) != data_size) return false; p_integrity_alg[SHARED_SECRET_MAX-1] = 0; if (strcmp(p_integrity_alg, my_alg)) { - dev_err(DEV, "incompatible setting of the data-integrity-alg\n"); + conn_err(tconn, "incompatible setting of the data-integrity-alg\n"); goto disconnect; } - dev_info(DEV, "data-integrity-alg: %s\n", + conn_info(tconn, "data-integrity-alg: %s\n", my_alg[0] ? my_alg : (unsigned char *)""); } return true; disconnect: - conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); + conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); return false; } @@ -3880,7 +3880,7 @@ static struct data_cmd drbd_cmd_handler[] = { [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } }, [P_SYNC_PARAM] = { 1, sizeof(struct p_header), MDEV, { receive_SyncParam } }, [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), MDEV, { receive_SyncParam } }, - [P_PROTOCOL] = { 1, sizeof(struct p_protocol), MDEV, { receive_protocol } }, + [P_PROTOCOL] = { 1, sizeof(struct p_protocol), CONN, { .conn_fn = receive_protocol } }, [P_UUIDS] = { 0, sizeof(struct p_uuids), MDEV, { receive_uuids } }, [P_SIZES] = { 0, sizeof(struct p_sizes), MDEV, { receive_sizes } }, [P_STATE] = { 0, sizeof(struct p_state), MDEV, { receive_state } }, From dfafcc8a7ba120492ae2a27b6ec774aa3224903b Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 16 Mar 2011 10:55:07 +0100 Subject: [PATCH 191/609] drbd: Separate connection state changes from minor dev state changes #1 Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 36 +++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 4a985d3b67c..704cb708787 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3331,19 +3331,39 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd, mask = convert_state(mask); val = convert_state(val); - if (cmd == P_CONN_ST_CHG_REQ) { - rv = conn_request_state(mdev->tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY); - conn_send_sr_reply(mdev->tconn, rv); - } else { - rv = drbd_change_state(mdev, CS_VERBOSE, mask, val); - drbd_send_sr_reply(mdev, rv); - } + rv = drbd_change_state(mdev, CS_VERBOSE, mask, val); + drbd_send_sr_reply(mdev, rv); drbd_md_sync(mdev); return true; } +static int receive_req_conn_state(struct drbd_tconn *tconn, enum drbd_packet cmd, + unsigned int data_size) +{ + struct p_req_state *p = &tconn->data.rbuf.req_state; + union drbd_state mask, val; + enum drbd_state_rv rv; + + mask.i = be32_to_cpu(p->mask); + val.i = be32_to_cpu(p->val); + + if (test_bit(DISCARD_CONCURRENT, &tconn->flags) && + mutex_is_locked(&tconn->cstate_mutex)) { + conn_send_sr_reply(tconn, SS_CONCURRENT_ST_CHG); + return true; + } + + mask = convert_state(mask); + val = convert_state(val); + + rv = conn_request_state(tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY); + conn_send_sr_reply(tconn, rv); + + return true; +} + static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd, unsigned int data_size) { @@ -3891,7 +3911,7 @@ static struct data_cmd drbd_cmd_handler[] = { [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), MDEV, { receive_DataRequest } }, [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), MDEV, { receive_skip } }, [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), MDEV, { receive_out_of_sync } }, - [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), MDEV, { receive_req_state } }, + [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), CONN, { .conn_fn = receive_req_conn_state } }, }; /* All handler functions that expect a sub-header get that sub-heder in From a4fbda8eca8a56527033bea8efd3190dbcf3acc6 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 16 Mar 2011 11:13:17 +0100 Subject: [PATCH 192/609] drbd: Allow packet handler functions that take a connection (meta connection) Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 66 +++++++++++++++++++----------- 1 file changed, 41 insertions(+), 25 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 704cb708787..ba350bd562c 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -60,6 +60,11 @@ enum finish_epoch { FE_RECYCLED, }; +enum mdev_or_conn { + MDEV, + CONN, +}; + static int drbd_do_handshake(struct drbd_tconn *tconn); static int drbd_do_auth(struct drbd_tconn *tconn); static int drbd_disconnected(int vnr, void *p, void *data); @@ -3876,10 +3881,7 @@ static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd, struct data_cmd { int expect_payload; size_t pkt_size; - enum { - MDEV, - CONN, - } type; + enum mdev_or_conn fa_type; /* first argument's type */ union { int (*mdev_fn)(struct drbd_conf *, enum drbd_packet cmd, unsigned int to_receive); @@ -3954,7 +3956,7 @@ static void drbdd(struct drbd_tconn *tconn) } } - if (drbd_cmd_handler[pi.cmd].type == CONN) { + if (drbd_cmd_handler[pi.cmd].fa_type == CONN) { rv = drbd_cmd_handler[pi.cmd].conn_fn(tconn, pi.cmd, pi.size - shs); } else { struct drbd_conf *mdev = vnr_to_mdev(tconn, pi.vnr); @@ -4719,27 +4721,31 @@ static int tconn_process_done_ee(struct drbd_tconn *tconn) struct asender_cmd { size_t pkt_size; - int (*process)(struct drbd_conf *, enum drbd_packet); + enum mdev_or_conn fa_type; /* first argument's type */ + union { + int (*mdev_fn)(struct drbd_conf *mdev, enum drbd_packet cmd); + int (*conn_fn)(struct drbd_tconn *tconn, enum drbd_packet cmd); + }; }; static struct asender_cmd asender_tbl[] = { - [P_PING] = { sizeof(struct p_header), got_Ping }, - [P_PING_ACK] = { sizeof(struct p_header), got_PingAck }, - [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, - [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, - [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, - [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, - [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck }, - [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply }, - [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply}, - [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult }, - [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck }, - [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, - [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, - [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip }, - [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply}, - [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_RqSReply }, - [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, + [P_PING] = { sizeof(struct p_header), MDEV, { got_Ping } }, + [P_PING_ACK] = { sizeof(struct p_header), MDEV, { got_PingAck } }, + [P_RECV_ACK] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } }, + [P_WRITE_ACK] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } }, + [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } }, + [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } }, + [P_NEG_ACK] = { sizeof(struct p_block_ack), MDEV, { got_NegAck } }, + [P_NEG_DREPLY] = { sizeof(struct p_block_ack), MDEV, { got_NegDReply } }, + [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), MDEV, { got_NegRSDReply } }, + [P_OV_RESULT] = { sizeof(struct p_block_ack), MDEV, { got_OVResult } }, + [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), MDEV, { got_BarrierAck } }, + [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), MDEV, { got_RqSReply } }, + [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), MDEV, { got_IsInSync } }, + [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), MDEV, { got_skip } }, + [P_RS_CANCEL] = { sizeof(struct p_block_ack), MDEV, { got_NegRSDReply } }, + [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), MDEV, { got_RqSReply } }, + [P_RETRY_WRITE] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } }, }; int drbd_asender(struct drbd_thread *thi) @@ -4842,10 +4848,20 @@ int drbd_asender(struct drbd_thread *thi) } } if (received == expect) { - tconn->last_received = jiffies; - if (!cmd->process(vnr_to_mdev(tconn, pi.vnr), pi.cmd)) + bool rv; + + if (cmd->fa_type == CONN) { + rv = cmd->conn_fn(tconn, pi.cmd); + } else { + struct drbd_conf *mdev = vnr_to_mdev(tconn, pi.vnr); + rv = cmd->mdev_fn(mdev, pi.cmd); + } + + if (!rv) goto reconnect; + tconn->last_received = jiffies; + /* the idle_timeout (ping-int) * has been restored in got_PingAck() */ if (cmd == &asender_tbl[P_PING_ACK]) From f19e4f8ba7a785c7250c04715b020d7eb8eda00a Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 16 Mar 2011 11:21:50 +0100 Subject: [PATCH 193/609] drbd: Converted got_Ping() and got_PingAck() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index ba350bd562c..097ca71d194 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4441,15 +4441,14 @@ static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd) return true; } -static int got_Ping(struct drbd_conf *mdev, enum drbd_packet cmd) +static int got_Ping(struct drbd_tconn *tconn, enum drbd_packet cmd) { - return drbd_send_ping_ack(mdev->tconn); + return drbd_send_ping_ack(tconn); } -static int got_PingAck(struct drbd_conf *mdev, enum drbd_packet cmd) +static int got_PingAck(struct drbd_tconn *tconn, enum drbd_packet cmd) { - struct drbd_tconn *tconn = mdev->tconn; /* restore idle timeout */ tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ; if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags)) @@ -4729,8 +4728,8 @@ struct asender_cmd { }; static struct asender_cmd asender_tbl[] = { - [P_PING] = { sizeof(struct p_header), MDEV, { got_Ping } }, - [P_PING_ACK] = { sizeof(struct p_header), MDEV, { got_PingAck } }, + [P_PING] = { sizeof(struct p_header), CONN, { .conn_fn = got_Ping } }, + [P_PING_ACK] = { sizeof(struct p_header), CONN, { .conn_fn = got_PingAck } }, [P_RECV_ACK] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } }, [P_WRITE_ACK] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } }, [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } }, From e4f78edee10ebe789564c8468d8a6b995e4045bb Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 16 Mar 2011 11:27:48 +0100 Subject: [PATCH 194/609] drbd: Separate connection state changes from minor dev state changes #2 New function got_conn_RqSReply() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 47 +++++++++++++++++------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 097ca71d194..50c52712715 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4412,32 +4412,37 @@ int drbdd_init(struct drbd_thread *thi) /* ********* acknowledge sender ******** */ +static int got_conn_RqSReply(struct drbd_tconn *tconn, enum drbd_packet cmd) +{ + struct p_req_state_reply *p = &tconn->meta.rbuf.req_state_reply; + int retcode = be32_to_cpu(p->retcode); + + if (retcode >= SS_SUCCESS) { + set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags); + } else { + set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags); + conn_err(tconn, "Requested state change failed by peer: %s (%d)\n", + drbd_set_st_err_str(retcode), retcode); + } + wake_up(&tconn->ping_wait); + + return true; +} + static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd) { struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply; - struct drbd_tconn *tconn = mdev->tconn; - int retcode = be32_to_cpu(p->retcode); - if (cmd == P_STATE_CHG_REPLY) { - if (retcode >= SS_SUCCESS) { - set_bit(CL_ST_CHG_SUCCESS, &mdev->flags); - } else { - set_bit(CL_ST_CHG_FAIL, &mdev->flags); - dev_err(DEV, "Requested state change failed by peer: %s (%d)\n", - drbd_set_st_err_str(retcode), retcode); - } - wake_up(&mdev->state_wait); - } else /* conn == P_CONN_ST_CHG_REPLY */ { - if (retcode >= SS_SUCCESS) { - set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags); - } else { - set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags); - conn_err(tconn, "Requested state change failed by peer: %s (%d)\n", - drbd_set_st_err_str(retcode), retcode); - } - wake_up(&tconn->ping_wait); + if (retcode >= SS_SUCCESS) { + set_bit(CL_ST_CHG_SUCCESS, &mdev->flags); + } else { + set_bit(CL_ST_CHG_FAIL, &mdev->flags); + dev_err(DEV, "Requested state change failed by peer: %s (%d)\n", + drbd_set_st_err_str(retcode), retcode); } + wake_up(&mdev->state_wait); + return true; } @@ -4743,7 +4748,7 @@ static struct asender_cmd asender_tbl[] = { [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), MDEV, { got_IsInSync } }, [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), MDEV, { got_skip } }, [P_RS_CANCEL] = { sizeof(struct p_block_ack), MDEV, { got_NegRSDReply } }, - [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), MDEV, { got_RqSReply } }, + [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), CONN, {.conn_fn = got_conn_RqSReply}}, [P_RETRY_WRITE] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } }, }; From 40cbf085f584840847e3f945c10c75f7d36237fc Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 16 Mar 2011 16:52:10 +0100 Subject: [PATCH 195/609] drbd: fix conn_reconfig_start without conn_reconfig_done in drbd_adm_attach If drbd_adm_attach failed early, it left the CONFIG_PENDING bit on, blocking any further conn_reconfig_start on that connection. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index d952e877f8d..daf388f4bd1 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1041,7 +1041,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) if (!adm_ctx.reply_skb) return retcode; if (retcode != NO_ERROR) - goto fail; + goto finish; mdev = adm_ctx.mdev; conn_reconfig_start(mdev->tconn); @@ -1400,8 +1400,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) force_diskless: drbd_force_state(mdev, NS(disk, D_FAILED)); drbd_md_sync(mdev); - conn_reconfig_done(mdev->tconn); fail: + conn_reconfig_done(mdev->tconn); if (nbc) { if (nbc->backing_bdev) blkdev_put(nbc->backing_bdev, @@ -1413,6 +1413,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } lc_destroy(resync_lru); + finish: drbd_adm_finish(info, retcode); return 0; } From 047e95e259e81d7b97eca10cda0aa93082531ac1 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 16 Mar 2011 14:43:36 +0100 Subject: [PATCH 196/609] drbd: Allow volumes to become primary only on one side Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 21 ++++++++++++++++++--- drivers/block/drbd/drbd_strings.c | 1 + include/linux/drbd.h | 3 ++- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 886b996ec7b..11685658659 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -329,6 +329,18 @@ static void print_state_change(struct drbd_conf *mdev, union drbd_state os, unio dev_info(DEV, "%s\n", pb); } +static bool vol_has_primary_peer(struct drbd_tconn *tconn) +{ + struct drbd_conf *mdev; + int minor; + + idr_for_each_entry(&tconn->volumes, mdev, minor) { + if (mdev->state.peer == R_PRIMARY) + return true; + } + return false; +} + /** * is_valid_state() - Returns an SS_ error code if ns is not valid * @mdev: DRBD device. @@ -349,9 +361,12 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) } if (get_net_conf(mdev->tconn)) { - if (!mdev->tconn->net_conf->two_primaries && - ns.role == R_PRIMARY && ns.peer == R_PRIMARY) - rv = SS_TWO_PRIMARIES; + if (!mdev->tconn->net_conf->two_primaries && ns.role == R_PRIMARY) { + if (ns.peer == R_PRIMARY) + rv = SS_TWO_PRIMARIES; + else if (vol_has_primary_peer(mdev->tconn)) + rv = SS_O_VOL_PEER_PRI; + } put_net_conf(mdev->tconn); } diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c index c44a2a60277..9a664bd2740 100644 --- a/drivers/block/drbd/drbd_strings.c +++ b/drivers/block/drbd/drbd_strings.c @@ -89,6 +89,7 @@ static const char *drbd_state_sw_errors[] = { [-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated", [-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change", [-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted", + [-SS_O_VOL_PEER_PRI] = "Other vol primary on peer not allowed by config", }; const char *drbd_conn_str(enum drbd_conns s) diff --git a/include/linux/drbd.h b/include/linux/drbd.h index d28fdd8fcd4..9cdb888607a 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -300,7 +300,8 @@ enum drbd_state_rv { SS_NOT_SUPPORTED = -17, /* drbd-8.2 only */ SS_IN_TRANSIENT_STATE = -18, /* Retry after the next state change */ SS_CONCURRENT_ST_CHG = -19, /* Concurrent cluster side state change! */ - SS_AFTER_LAST_ERROR = -20, /* Keep this at bottom */ + SS_O_VOL_PEER_PRI = -20, + SS_AFTER_LAST_ERROR = -21, /* Keep this at bottom */ }; /* from drbd_strings.c */ From 6b75dced005c7f06b81934167e36bcfc690cc3a7 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 16 Mar 2011 17:39:12 +0100 Subject: [PATCH 197/609] drbd: conn_khelper() for user mode callbacks for connections Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 99 ++++++++++++++++++++++++++---------- 1 file changed, 73 insertions(+), 26 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index daf388f4bd1..ac0a175e778 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -252,43 +252,47 @@ static int drbd_adm_finish(struct genl_info *info, int retcode) return 0; } +static void setup_khelper_env(struct drbd_tconn *tconn, char **envp) +{ + char *afs; + + if (get_net_conf(tconn)) { + switch (((struct sockaddr *)tconn->net_conf->peer_addr)->sa_family) { + case AF_INET6: + afs = "ipv6"; + snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6", + &((struct sockaddr_in6 *)tconn->net_conf->peer_addr)->sin6_addr); + break; + case AF_INET: + afs = "ipv4"; + snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4", + &((struct sockaddr_in *)tconn->net_conf->peer_addr)->sin_addr); + break; + default: + afs = "ssocks"; + snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4", + &((struct sockaddr_in *)tconn->net_conf->peer_addr)->sin_addr); + } + snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs); + put_net_conf(tconn); + } +} + int drbd_khelper(struct drbd_conf *mdev, char *cmd) { char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", - NULL, /* Will be set to address family */ - NULL, /* Will be set to address */ + (char[20]) { }, /* address family */ + (char[60]) { }, /* address */ NULL }; - char mb[12], af[20], ad[60], *afs; + char mb[12]; char *argv[] = {usermode_helper, cmd, mb, NULL }; struct sib_info sib; int ret; snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev)); - - if (get_net_conf(mdev->tconn)) { - switch (((struct sockaddr *)mdev->tconn->net_conf->peer_addr)->sa_family) { - case AF_INET6: - afs = "ipv6"; - snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI6", - &((struct sockaddr_in6 *)mdev->tconn->net_conf->peer_addr)->sin6_addr); - break; - case AF_INET: - afs = "ipv4"; - snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4", - &((struct sockaddr_in *)mdev->tconn->net_conf->peer_addr)->sin_addr); - break; - default: - afs = "ssocks"; - snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4", - &((struct sockaddr_in *)mdev->tconn->net_conf->peer_addr)->sin_addr); - } - snprintf(af, 20, "DRBD_PEER_AF=%s", afs); - envp[3]=af; - envp[4]=ad; - put_net_conf(mdev->tconn); - } + setup_khelper_env(mdev->tconn, envp); /* The helper may take some time. * write out any unsynced meta data changes now */ @@ -317,6 +321,49 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) return ret; } +static void conn_md_sync(struct drbd_tconn *tconn) +{ + struct drbd_conf *mdev; + int minor; + + idr_for_each_entry(&tconn->volumes, mdev, minor) + drbd_md_sync(mdev); +} + +int conn_khelper(struct drbd_tconn *tconn, char *cmd) +{ + char *envp[] = { "HOME=/", + "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + (char[20]) { }, /* address family */ + (char[60]) { }, /* address */ + NULL }; + char *argv[] = {usermode_helper, cmd, tconn->name, NULL }; + int ret; + + setup_khelper_env(tconn, envp); + conn_md_sync(tconn); + + conn_info(tconn, "helper command: %s %s %s\n", usermode_helper, cmd, tconn->name); + /* TODO: conn_bcast_event() ?? */ + + ret = call_usermodehelper(usermode_helper, argv, envp, 1); + if (ret) + conn_warn(tconn, "helper command: %s %s %s exit code %u (0x%x)\n", + usermode_helper, cmd, tconn->name, + (ret >> 8) & 0xff, ret); + else + conn_info(tconn, "helper command: %s %s %s exit code %u (0x%x)\n", + usermode_helper, cmd, tconn->name, + (ret >> 8) & 0xff, ret); + /* TODO: conn_bcast_event() ?? */ + + if (ret < 0) /* Ignore any ERRNOs we got. */ + ret = 0; + + return ret; +} + enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev) { char *ex_to_string; From cb5bd4d19b46c220b1ac8462a3da01767dd99488 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Fri, 2 Nov 2012 16:43:04 +0100 Subject: [PATCH 198/609] xen/blkback: persistent-grants fixes This patch contains fixes for persistent grants implementation v2: * handle == 0 is a valid handle, so initialize grants in blkback setting the handle to BLKBACK_INVALID_HANDLE instead of 0. Reported by Konrad Rzeszutek Wilk. * new_map is a boolean, use "true" or "false" instead of 1 and 0. Reported by Konrad Rzeszutek Wilk. * blkfront announces the persistent-grants feature as feature-persistent-grants, use feature-persistent instead which is consistent with blkback and the public Xen headers. * Add a consistency check in blkfront to make sure we don't try to access segments that have not been set. Reported-by: Konrad Rzeszutek Wilk Signed-off-by: Roger Pau Monne [v1: The new_map int->bool had already been changed] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 9 ++++++--- drivers/block/xen-blkback/xenbus.c | 2 +- drivers/block/xen-blkfront.c | 3 ++- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index d7dd5cbdac5..a05961683ef 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -512,7 +512,7 @@ static int xen_blkbk_map(struct blkif_request *req, * not mapped but we have room for it */ new_map = true; - persistent_gnt = kzalloc( + persistent_gnt = kmalloc( sizeof(struct persistent_gnt), GFP_KERNEL); if (!persistent_gnt) @@ -523,6 +523,7 @@ static int xen_blkbk_map(struct blkif_request *req, return -ENOMEM; } persistent_gnt->gnt = req->u.rw.seg[i].gref; + persistent_gnt->handle = BLKBACK_INVALID_HANDLE; pages_to_gnt[segs_to_map] = persistent_gnt->page; @@ -584,7 +585,8 @@ static int xen_blkbk_map(struct blkif_request *req, */ bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST); for (i = 0, j = 0; i < nseg; i++) { - if (!persistent_gnts[i] || !persistent_gnts[i]->handle) { + if (!persistent_gnts[i] || + persistent_gnts[i]->handle == BLKBACK_INVALID_HANDLE) { /* This is a newly mapped grant */ BUG_ON(j >= segs_to_map); if (unlikely(map[j].status != 0)) { @@ -601,7 +603,8 @@ static int xen_blkbk_map(struct blkif_request *req, } } if (persistent_gnts[i]) { - if (!persistent_gnts[i]->handle) { + if (persistent_gnts[i]->handle == + BLKBACK_INVALID_HANDLE) { /* * If this is a new persistent grant * save the handler diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index b2250265308..a03ecbb0044 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -760,7 +760,7 @@ static int connect_ring(struct backend_info *be) return -1; } err = xenbus_gather(XBT_NIL, dev->otherend, - "feature-persistent-grants", "%u", + "feature-persistent", "%u", &pers_grants, NULL); if (err) pers_grants = 0; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 911d733d21b..f1de806b0a6 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -852,6 +852,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, rq_for_each_segment(bvec, s->request, iter) { BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE); i = offset >> PAGE_SHIFT; + BUG_ON(i >= s->req.u.rw.nr_segments); shared_data = kmap_atomic( pfn_to_page(s->grants_used[i]->pfn)); bvec_data = bvec_kmap_irq(bvec, &flags); @@ -1069,7 +1070,7 @@ again: goto abort_transaction; } err = xenbus_printf(xbt, dev->nodename, - "feature-persistent-grants", "%u", 1); + "feature-persistent", "%u", 1); if (err) dev_warn(&dev->dev, "writing persistent grants feature to xenbus"); From f399002e68e626e7bc443e6fcab1772704cc197f Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 23 Mar 2011 14:31:09 +0100 Subject: [PATCH 199/609] drbd: distribute former syncer_conf settings to disk, connection, and resource level This commit breaks the API again. Move per-volume former syncer options into disk_conf. Move per-connection former syncer options into net_conf. Renamed the remainign sync_conf to res_opts Syncer settings have been changeable at runtime, so we need to prepare for these settings to be runtime-changeable in their new home as well. Introduce new configuration operations, and share the netlink attribute between "attach" (create new disk) and "disk-opts" (change options). Same for "connect" and "net-opts". Some fields cannot be changed at runtime, however. Introduce a new flag GENLA_F_INVARIANT to be able to trigger on that in the generated validation and assignment functions. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 10 +- drivers/block/drbd/drbd_main.c | 72 ++-- drivers/block/drbd/drbd_nl.c | 546 +++++++++++++++++++---------- drivers/block/drbd/drbd_receiver.c | 51 +-- drivers/block/drbd/drbd_state.c | 4 +- drivers/block/drbd/drbd_worker.c | 50 +-- include/linux/drbd_genl.h | 133 ++++--- include/linux/drbd_limits.h | 2 + include/linux/genl_magic_func.h | 49 ++- include/linux/genl_magic_struct.h | 18 +- 10 files changed, 570 insertions(+), 365 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index d6e7e657e7a..bc265f3733c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -860,7 +860,7 @@ struct drbd_md { s32 bm_offset; /* signed relative sector offset to bitmap */ /* u32 al_nr_extents; important for restoring the AL - * is stored into sync_conf.al_extents, which in turn + * is stored into ldev->dc.al_extents, which in turn * gets applied to act_log->nr_elements */ }; @@ -929,6 +929,7 @@ struct drbd_tconn { /* is a resource from the config file */ atomic_t net_cnt; /* Users of net_conf */ wait_queue_head_t net_cnt_wait; wait_queue_head_t ping_wait; /* Woken upon reception of a ping, and a state change */ + struct res_opts res_opts; struct drbd_socket data; /* data/barrier/cstate/parameter packets */ struct drbd_socket meta; /* ping/ack (metadata) packets */ @@ -945,6 +946,8 @@ struct drbd_tconn { /* is a resource from the config file */ struct crypto_hash *cram_hmac_tfm; struct crypto_hash *integrity_w_tfm; /* to be used by the worker thread */ struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */ + struct crypto_hash *csums_tfm; + struct crypto_hash *verify_tfm; void *int_dig_out; void *int_dig_in; void *int_dig_vv; @@ -963,7 +966,6 @@ struct drbd_conf { unsigned long flags; /* configured by drbdsetup */ - struct syncer_conf sync_conf; struct drbd_backing_dev *ldev __protected_by(local); sector_t p_size; /* partner's disk size */ @@ -1037,8 +1039,6 @@ struct drbd_conf { /* size of out-of-sync range in sectors. */ sector_t ov_last_oos_size; unsigned long ov_left; /* in bits */ - struct crypto_hash *csums_tfm; - struct crypto_hash *verify_tfm; struct drbd_bitmap *bitmap; unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */ @@ -1188,7 +1188,7 @@ extern int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data, size_t size); #define USE_DATA_SOCKET 1 #define USE_META_SOCKET 0 -extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc); +extern int drbd_send_sync_param(struct drbd_conf *mdev); extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size); extern int drbd_send_ack(struct drbd_conf *, enum drbd_packet, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 79a0e042252..bdb12723585 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -784,7 +784,7 @@ int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data, return ok; } -int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) +int drbd_send_sync_param(struct drbd_conf *mdev) { struct p_rs_param_95 *p; struct socket *sock; @@ -793,7 +793,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) size = apv <= 87 ? sizeof(struct p_rs_param) : apv == 88 ? sizeof(struct p_rs_param) - + strlen(mdev->sync_conf.verify_alg) + 1 + + strlen(mdev->tconn->net_conf->verify_alg) + 1 : apv <= 94 ? sizeof(struct p_rs_param_89) : /* apv >= 95 */ sizeof(struct p_rs_param_95); @@ -812,16 +812,25 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) /* initialize verify_alg and csums_alg */ memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); - p->rate = cpu_to_be32(sc->rate); - p->c_plan_ahead = cpu_to_be32(sc->c_plan_ahead); - p->c_delay_target = cpu_to_be32(sc->c_delay_target); - p->c_fill_target = cpu_to_be32(sc->c_fill_target); - p->c_max_rate = cpu_to_be32(sc->c_max_rate); + if (get_ldev(mdev)) { + p->rate = cpu_to_be32(mdev->ldev->dc.resync_rate); + p->c_plan_ahead = cpu_to_be32(mdev->ldev->dc.c_plan_ahead); + p->c_delay_target = cpu_to_be32(mdev->ldev->dc.c_delay_target); + p->c_fill_target = cpu_to_be32(mdev->ldev->dc.c_fill_target); + p->c_max_rate = cpu_to_be32(mdev->ldev->dc.c_max_rate); + put_ldev(mdev); + } else { + p->rate = cpu_to_be32(DRBD_RATE_DEF); + p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF); + p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF); + p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF); + p->c_max_rate = cpu_to_be32(DRBD_C_MAX_RATE_DEF); + } if (apv >= 88) - strcpy(p->verify_alg, mdev->sync_conf.verify_alg); + strcpy(p->verify_alg, mdev->tconn->net_conf->verify_alg); if (apv >= 89) - strcpy(p->csums_alg, mdev->sync_conf.csums_alg); + strcpy(p->csums_alg, mdev->tconn->net_conf->csums_alg); rv = _drbd_send_cmd(mdev, sock, cmd, &p->head, size, 0); } else @@ -1043,7 +1052,7 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, int bits; /* may we use this feature? */ - if ((mdev->sync_conf.use_rle == 0) || + if ((mdev->tconn->net_conf->use_rle == 0) || (mdev->tconn->agreed_pro_version < 90)) return 0; @@ -1790,26 +1799,8 @@ static int drbd_release(struct gendisk *gd, fmode_t mode) static void drbd_set_defaults(struct drbd_conf *mdev) { - /* This way we get a compile error when sync_conf grows, - and we forgot to initialize it here */ - mdev->sync_conf = (struct syncer_conf) { - /* .rate = */ DRBD_RATE_DEF, - /* .after = */ DRBD_AFTER_DEF, - /* .al_extents = */ DRBD_AL_EXTENTS_DEF, - /* .verify_alg = */ {}, 0, - /* .cpu_mask = */ {}, 0, - /* .csums_alg = */ {}, 0, - /* .use_rle = */ 0, - /* .on_no_data = */ DRBD_ON_NO_DATA_DEF, - /* .c_plan_ahead = */ DRBD_C_PLAN_AHEAD_DEF, - /* .c_delay_target = */ DRBD_C_DELAY_TARGET_DEF, - /* .c_fill_target = */ DRBD_C_FILL_TARGET_DEF, - /* .c_max_rate = */ DRBD_C_MAX_RATE_DEF, - /* .c_min_rate = */ DRBD_C_MIN_RATE_DEF - }; - - /* Have to use that way, because the layout differs between - big endian and little endian */ + /* Beware! The actual layout differs + * between big endian and little endian */ mdev->state = (union drbd_state) { { .role = R_SECONDARY, .peer = R_UNKNOWN, @@ -2286,6 +2277,11 @@ struct drbd_tconn *drbd_new_tconn(const char *name) drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker"); drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender"); + tconn->res_opts = (struct res_opts) { + {}, 0, /* cpu_mask */ + DRBD_ON_NO_DATA_DEF, /* on_no_data */ + }; + mutex_lock(&drbd_cfg_mutex); list_add_tail(&tconn->all_tconn, &drbd_tconns); mutex_unlock(&drbd_cfg_mutex); @@ -2559,10 +2555,10 @@ void drbd_free_sock(struct drbd_tconn *tconn) void drbd_free_resources(struct drbd_conf *mdev) { - crypto_free_hash(mdev->csums_tfm); - mdev->csums_tfm = NULL; - crypto_free_hash(mdev->verify_tfm); - mdev->verify_tfm = NULL; + crypto_free_hash(mdev->tconn->csums_tfm); + mdev->tconn->csums_tfm = NULL; + crypto_free_hash(mdev->tconn->verify_tfm); + mdev->tconn->verify_tfm = NULL; crypto_free_hash(mdev->tconn->cram_hmac_tfm); mdev->tconn->cram_hmac_tfm = NULL; crypto_free_hash(mdev->tconn->integrity_w_tfm); @@ -2589,7 +2585,7 @@ struct meta_data_on_disk { u32 md_size_sect; u32 al_offset; /* offset to this block */ u32 al_nr_extents; /* important for restoring the AL */ - /* `-- act_log->nr_elements <-- sync_conf.al_extents */ + /* `-- act_log->nr_elements <-- ldev->dc.al_extents */ u32 bm_offset; /* offset to the bitmap, from here */ u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */ u32 la_peer_max_bio_size; /* last peer max_bio_size */ @@ -2715,7 +2711,7 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) for (i = UI_CURRENT; i < UI_SIZE; i++) bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]); bdev->md.flags = be32_to_cpu(buffer->flags); - mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents); + bdev->dc.al_extents = be32_to_cpu(buffer->al_nr_extents); bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid); spin_lock_irq(&mdev->tconn->req_lock); @@ -2727,8 +2723,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) } spin_unlock_irq(&mdev->tconn->req_lock); - if (mdev->sync_conf.al_extents < 7) - mdev->sync_conf.al_extents = 127; + if (bdev->dc.al_extents < 7) + bdev->dc.al_extents = 127; err: mutex_unlock(&mdev->md_io_mutex); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ac0a175e778..18cd2ed4e8c 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -53,8 +53,10 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info); int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info); int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info); int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info); int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info); int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info); int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info); int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info); @@ -66,7 +68,7 @@ int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info); int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info); int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info); int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info); -int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info); int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info); int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info); /* .dumpit */ @@ -170,7 +172,7 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, if (info->attrs[DRBD_NLA_CFG_CONTEXT]) { struct nlattr *nla; /* parse and validate only */ - err = drbd_cfg_context_from_attrs(NULL, info->attrs); + err = drbd_cfg_context_from_attrs(NULL, info); if (err) goto fail; @@ -616,6 +618,7 @@ static const char *from_attrs_err_to_txt(int err) { return err == -ENOMSG ? "required attribute missing" : err == -EOPNOTSUPP ? "unknown mandatory attribute" : + err == -EEXIST ? "can not change invariant setting" : "invalid attribute value"; } @@ -633,7 +636,7 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info) memset(&parms, 0, sizeof(parms)); if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) { - err = set_role_parms_from_attrs(&parms, info->attrs); + err = set_role_parms_from_attrs(&parms, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -898,24 +901,24 @@ drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int ass * failed, and 0 on success. You should call drbd_md_sync() after you called * this function. */ -static int drbd_check_al_size(struct drbd_conf *mdev) +static int drbd_check_al_size(struct drbd_conf *mdev, struct disk_conf *dc) { struct lru_cache *n, *t; struct lc_element *e; unsigned int in_use; int i; - if (!expect(mdev->sync_conf.al_extents >= DRBD_AL_EXTENTS_MIN)) - mdev->sync_conf.al_extents = DRBD_AL_EXTENTS_MIN; + if (!expect(dc->al_extents >= DRBD_AL_EXTENTS_MIN)) + dc->al_extents = DRBD_AL_EXTENTS_MIN; if (mdev->act_log && - mdev->act_log->nr_elements == mdev->sync_conf.al_extents) + mdev->act_log->nr_elements == dc->al_extents) return 0; in_use = 0; t = mdev->act_log; n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION, - mdev->sync_conf.al_extents, sizeof(struct lc_element), 0); + dc->al_extents, sizeof(struct lc_element), 0); if (n == NULL) { dev_err(DEV, "Cannot allocate act_log lru!\n"); @@ -1069,6 +1072,114 @@ static void drbd_suspend_al(struct drbd_conf *mdev) dev_info(DEV, "Suspended AL updates\n"); } +int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) +{ + enum drbd_ret_code retcode; + struct drbd_conf *mdev; + struct disk_conf *ndc; /* new disk conf */ + int err, fifo_size; + int *rs_plan_s = NULL; + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + mdev = adm_ctx.mdev; + + /* we also need a disk + * to change the options on */ + if (!get_ldev(mdev)) { + retcode = ERR_NO_DISK; + goto out; + } + +/* FIXME freeze IO, cluster wide. + * + * We should make sure no-one uses + * some half-updated struct when we + * assign it later. */ + + ndc = kmalloc(sizeof(*ndc), GFP_KERNEL); + if (!ndc) { + retcode = ERR_NOMEM; + goto fail; + } + + memcpy(ndc, &mdev->ldev->dc, sizeof(*ndc)); + err = disk_conf_from_attrs_for_change(ndc, info); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + } + + if (!expect(ndc->resync_rate >= 1)) + ndc->resync_rate = 1; + + /* clip to allowed range */ + if (!expect(ndc->al_extents >= DRBD_AL_EXTENTS_MIN)) + ndc->al_extents = DRBD_AL_EXTENTS_MIN; + if (!expect(ndc->al_extents <= DRBD_AL_EXTENTS_MAX)) + ndc->al_extents = DRBD_AL_EXTENTS_MAX; + + /* most sanity checks done, try to assign the new sync-after + * dependency. need to hold the global lock in there, + * to avoid a race in the dependency loop check. */ + retcode = drbd_alter_sa(mdev, ndc->resync_after); + if (retcode != NO_ERROR) + goto fail; + + fifo_size = (ndc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; + if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { + rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); + if (!rs_plan_s) { + dev_err(DEV, "kmalloc of fifo_buffer failed"); + retcode = ERR_NOMEM; + goto fail; + } + } + + if (fifo_size != mdev->rs_plan_s.size) { + kfree(mdev->rs_plan_s.values); + mdev->rs_plan_s.values = rs_plan_s; + mdev->rs_plan_s.size = fifo_size; + mdev->rs_planed = 0; + rs_plan_s = NULL; + } + + wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); + drbd_al_shrink(mdev); + err = drbd_check_al_size(mdev, ndc); + lc_unlock(mdev->act_log); + wake_up(&mdev->al_wait); + + if (err) { + retcode = ERR_NOMEM; + goto fail; + } + + /* FIXME + * To avoid someone looking at a half-updated struct, we probably + * should have a rw-semaphor on net_conf and disk_conf. + */ + mdev->ldev->dc = *ndc; + + drbd_md_sync(mdev); + + + if (mdev->state.conn >= C_CONNECTED) + drbd_send_sync_param(mdev); + + fail: + put_ldev(mdev); + kfree(ndc); + kfree(rs_plan_s); + out: + drbd_adm_finish(info, retcode); + return 0; +} + int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) { struct drbd_conf *mdev; @@ -1111,12 +1222,29 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto fail; } - nbc->dc.disk_size = DRBD_DISK_SIZE_SECT_DEF; - nbc->dc.on_io_error = DRBD_ON_IO_ERROR_DEF; - nbc->dc.fencing = DRBD_FENCING_DEF; - nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF; + nbc->dc = (struct disk_conf) { + {}, 0, /* backing_dev */ + {}, 0, /* meta_dev */ + 0, /* meta_dev_idx */ + DRBD_DISK_SIZE_SECT_DEF, /* disk_size */ + DRBD_MAX_BIO_BVECS_DEF, /* max_bio_bvecs */ + DRBD_ON_IO_ERROR_DEF, /* on_io_error */ + DRBD_FENCING_DEF, /* fencing */ + DRBD_RATE_DEF, /* resync_rate */ + DRBD_AFTER_DEF, /* resync_after */ + DRBD_AL_EXTENTS_DEF, /* al_extents */ + DRBD_C_PLAN_AHEAD_DEF, /* c_plan_ahead */ + DRBD_C_DELAY_TARGET_DEF, /* c_delay_target */ + DRBD_C_FILL_TARGET_DEF, /* c_fill_target */ + DRBD_C_MAX_RATE_DEF, /* c_max_rate */ + DRBD_C_MIN_RATE_DEF, /* c_min_rate */ + 0, /* no_disk_barrier */ + 0, /* no_disk_flush */ + 0, /* no_disk_drain */ + 0, /* no_md_flush */ + }; - err = disk_conf_from_attrs(&nbc->dc, info->attrs); + err = disk_conf_from_attrs(&nbc->dc, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -1267,7 +1395,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } /* Since we are diskless, fix the activity log first... */ - if (drbd_check_al_size(mdev)) { + if (drbd_check_al_size(mdev, &nbc->dc)) { retcode = ERR_NOMEM; goto force_diskless_dec; } @@ -1498,6 +1626,158 @@ out: return 0; } +static bool conn_resync_running(struct drbd_tconn *tconn) +{ + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) { + if (mdev->state.conn == C_SYNC_SOURCE || + mdev->state.conn == C_SYNC_TARGET || + mdev->state.conn == C_PAUSED_SYNC_S || + mdev->state.conn == C_PAUSED_SYNC_T) + return true; + } + return false; +} + +static bool conn_ov_running(struct drbd_tconn *tconn) +{ + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) { + if (mdev->state.conn == C_VERIFY_S || + mdev->state.conn == C_VERIFY_T) + return true; + } + return false; +} + +int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) +{ + enum drbd_ret_code retcode; + struct drbd_tconn *tconn; + struct net_conf *new_conf = NULL; + int err; + int ovr; /* online verify running */ + int rsr; /* re-sync running */ + struct crypto_hash *verify_tfm = NULL; + struct crypto_hash *csums_tfm = NULL; + + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + tconn = adm_ctx.tconn; + + new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); + if (!new_conf) { + retcode = ERR_NOMEM; + goto out; + } + + /* we also need a net config + * to change the options on */ + if (!get_net_conf(tconn)) { + drbd_msg_put_info("net conf missing, try connect"); + retcode = ERR_INVALID_REQUEST; + goto out; + } + + conn_reconfig_start(tconn); + + memcpy(new_conf, tconn->net_conf, sizeof(*new_conf)); + err = net_conf_from_attrs_for_change(new_conf, info); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto fail; + } + + /* re-sync running */ + rsr = conn_resync_running(tconn); + if (rsr && strcmp(new_conf->csums_alg, tconn->net_conf->csums_alg)) { + retcode = ERR_CSUMS_RESYNC_RUNNING; + goto fail; + } + + if (!rsr && new_conf->csums_alg[0]) { + csums_tfm = crypto_alloc_hash(new_conf->csums_alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(csums_tfm)) { + csums_tfm = NULL; + retcode = ERR_CSUMS_ALG; + goto fail; + } + + if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) { + retcode = ERR_CSUMS_ALG_ND; + goto fail; + } + } + + /* online verify running */ + ovr = conn_ov_running(tconn); + if (ovr) { + if (strcmp(new_conf->verify_alg, tconn->net_conf->verify_alg)) { + retcode = ERR_VERIFY_RUNNING; + goto fail; + } + } + + if (!ovr && new_conf->verify_alg[0]) { + verify_tfm = crypto_alloc_hash(new_conf->verify_alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(verify_tfm)) { + verify_tfm = NULL; + retcode = ERR_VERIFY_ALG; + goto fail; + } + + if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) { + retcode = ERR_VERIFY_ALG_ND; + goto fail; + } + } + + + /* For now, use struct assignment, not pointer assignment. + * We don't have any means to determine who might still + * keep a local alias into the struct, + * so we cannot just free it and hope for the best :( + * FIXME + * To avoid someone looking at a half-updated struct, we probably + * should have a rw-semaphor on net_conf and disk_conf. + */ + *tconn->net_conf = *new_conf; + + if (!rsr) { + crypto_free_hash(tconn->csums_tfm); + tconn->csums_tfm = csums_tfm; + csums_tfm = NULL; + } + if (!ovr) { + crypto_free_hash(tconn->verify_tfm); + tconn->verify_tfm = verify_tfm; + verify_tfm = NULL; + } + + if (tconn->cstate >= C_WF_REPORT_PARAMS) + drbd_send_sync_param(minor_to_mdev(conn_lowest_minor(tconn))); + + fail: + crypto_free_hash(csums_tfm); + crypto_free_hash(verify_tfm); + kfree(new_conf); + put_net_conf(tconn); + conn_reconfig_done(tconn); + out: + drbd_adm_finish(info, retcode); + return 0; +} + int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) { char hmac_name[CRYPTO_MAX_ALG_NAME]; @@ -1531,33 +1811,47 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) } /* allocation not in the IO path, cqueue thread context */ - new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); + new_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL); if (!new_conf) { retcode = ERR_NOMEM; goto fail; } - new_conf->timeout = DRBD_TIMEOUT_DEF; - new_conf->try_connect_int = DRBD_CONNECT_INT_DEF; - new_conf->ping_int = DRBD_PING_INT_DEF; - new_conf->max_epoch_size = DRBD_MAX_EPOCH_SIZE_DEF; - new_conf->max_buffers = DRBD_MAX_BUFFERS_DEF; - new_conf->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF; - new_conf->sndbuf_size = DRBD_SNDBUF_SIZE_DEF; - new_conf->rcvbuf_size = DRBD_RCVBUF_SIZE_DEF; - new_conf->ko_count = DRBD_KO_COUNT_DEF; - new_conf->after_sb_0p = DRBD_AFTER_SB_0P_DEF; - new_conf->after_sb_1p = DRBD_AFTER_SB_1P_DEF; - new_conf->after_sb_2p = DRBD_AFTER_SB_2P_DEF; - new_conf->want_lose = 0; - new_conf->two_primaries = 0; - new_conf->wire_protocol = DRBD_PROT_C; - new_conf->ping_timeo = DRBD_PING_TIMEO_DEF; - new_conf->rr_conflict = DRBD_RR_CONFLICT_DEF; - new_conf->on_congestion = DRBD_ON_CONGESTION_DEF; - new_conf->cong_extents = DRBD_CONG_EXTENTS_DEF; + *new_conf = (struct net_conf) { + {}, 0, /* my_addr */ + {}, 0, /* peer_addr */ + {}, 0, /* shared_secret */ + {}, 0, /* cram_hmac_alg */ + {}, 0, /* integrity_alg */ + {}, 0, /* verify_alg */ + {}, 0, /* csums_alg */ + DRBD_PROTOCOL_DEF, /* wire_protocol */ + DRBD_CONNECT_INT_DEF, /* try_connect_int */ + DRBD_TIMEOUT_DEF, /* timeout */ + DRBD_PING_INT_DEF, /* ping_int */ + DRBD_PING_TIMEO_DEF, /* ping_timeo */ + DRBD_SNDBUF_SIZE_DEF, /* sndbuf_size */ + DRBD_RCVBUF_SIZE_DEF, /* rcvbuf_size */ + DRBD_KO_COUNT_DEF, /* ko_count */ + DRBD_MAX_BUFFERS_DEF, /* max_buffers */ + DRBD_MAX_EPOCH_SIZE_DEF, /* max_epoch_size */ + DRBD_UNPLUG_WATERMARK_DEF, /* unplug_watermark */ + DRBD_AFTER_SB_0P_DEF, /* after_sb_0p */ + DRBD_AFTER_SB_1P_DEF, /* after_sb_1p */ + DRBD_AFTER_SB_2P_DEF, /* after_sb_2p */ + DRBD_RR_CONFLICT_DEF, /* rr_conflict */ + DRBD_ON_CONGESTION_DEF, /* on_congestion */ + DRBD_CONG_FILL_DEF, /* cong_fill */ + DRBD_CONG_EXTENTS_DEF, /* cong_extents */ + 0, /* two_primaries */ + 0, /* want_lose */ + 0, /* no_cork */ + 0, /* always_asbp */ + 0, /* dry_run */ + 0, /* use_rle */ + }; - err = net_conf_from_attrs(new_conf, info->attrs); + err = net_conf_from_attrs(new_conf, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -1789,7 +2083,7 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) tconn = adm_ctx.tconn; memset(&parms, 0, sizeof(parms)); if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) { - err = disconnect_parms_from_attrs(&parms, info->attrs); + err = disconnect_parms_from_attrs(&parms, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -1848,7 +2142,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) memset(&rs, 0, sizeof(struct resize_parms)); if (info->attrs[DRBD_NLA_RESIZE_PARMS]) { - err = resize_parms_from_attrs(&rs, info->attrs); + err = resize_parms_from_attrs(&rs, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -1904,26 +2198,21 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) return 0; } -int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info) +int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) { - struct drbd_conf *mdev; enum drbd_ret_code retcode; - int err; - int ovr; /* online verify running */ - int rsr; /* re-sync running */ - struct crypto_hash *verify_tfm = NULL; - struct crypto_hash *csums_tfm = NULL; - struct syncer_conf sc; cpumask_var_t new_cpu_mask; + struct drbd_tconn *tconn; int *rs_plan_s = NULL; - int fifo_size; + struct res_opts sc; + int err; - retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); if (!adm_ctx.reply_skb) return retcode; if (retcode != NO_ERROR) goto fail; - mdev = adm_ctx.mdev; + tconn = adm_ctx.tconn; if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) { retcode = ERR_NOMEM; @@ -1933,172 +2222,43 @@ int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info) if (((struct drbd_genlmsghdr*)info->userhdr)->flags & DRBD_GENL_F_SET_DEFAULTS) { - memset(&sc, 0, sizeof(struct syncer_conf)); - sc.rate = DRBD_RATE_DEF; - sc.after = DRBD_AFTER_DEF; - sc.al_extents = DRBD_AL_EXTENTS_DEF; + memset(&sc, 0, sizeof(struct res_opts)); sc.on_no_data = DRBD_ON_NO_DATA_DEF; - sc.c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF; - sc.c_delay_target = DRBD_C_DELAY_TARGET_DEF; - sc.c_fill_target = DRBD_C_FILL_TARGET_DEF; - sc.c_max_rate = DRBD_C_MAX_RATE_DEF; - sc.c_min_rate = DRBD_C_MIN_RATE_DEF; } else - memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf)); + sc = tconn->res_opts; - err = syncer_conf_from_attrs(&sc, info->attrs); + err = res_opts_from_attrs(&sc, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); goto fail; } - /* re-sync running */ - rsr = ( mdev->state.conn == C_SYNC_SOURCE || - mdev->state.conn == C_SYNC_TARGET || - mdev->state.conn == C_PAUSED_SYNC_S || - mdev->state.conn == C_PAUSED_SYNC_T ); - - if (rsr && strcmp(sc.csums_alg, mdev->sync_conf.csums_alg)) { - retcode = ERR_CSUMS_RESYNC_RUNNING; - goto fail; - } - - if (!rsr && sc.csums_alg[0]) { - csums_tfm = crypto_alloc_hash(sc.csums_alg, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(csums_tfm)) { - csums_tfm = NULL; - retcode = ERR_CSUMS_ALG; - goto fail; - } - - if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) { - retcode = ERR_CSUMS_ALG_ND; - goto fail; - } - } - - /* online verify running */ - ovr = (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T); - - if (ovr) { - if (strcmp(sc.verify_alg, mdev->sync_conf.verify_alg)) { - retcode = ERR_VERIFY_RUNNING; - goto fail; - } - } - - if (!ovr && sc.verify_alg[0]) { - verify_tfm = crypto_alloc_hash(sc.verify_alg, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(verify_tfm)) { - verify_tfm = NULL; - retcode = ERR_VERIFY_ALG; - goto fail; - } - - if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) { - retcode = ERR_VERIFY_ALG_ND; - goto fail; - } - } - /* silently ignore cpu mask on UP kernel */ if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) { err = __bitmap_parse(sc.cpu_mask, 32, 0, cpumask_bits(new_cpu_mask), nr_cpu_ids); if (err) { - dev_warn(DEV, "__bitmap_parse() failed with %d\n", err); + conn_warn(tconn, "__bitmap_parse() failed with %d\n", err); retcode = ERR_CPU_MASK_PARSE; goto fail; } } - if (!expect(sc.rate >= 1)) - sc.rate = 1; - /* clip to allowed range */ - if (!expect(sc.al_extents >= DRBD_AL_EXTENTS_MIN)) - sc.al_extents = DRBD_AL_EXTENTS_MIN; - if (!expect(sc.al_extents <= DRBD_AL_EXTENTS_MAX)) - sc.al_extents = DRBD_AL_EXTENTS_MAX; + tconn->res_opts = sc; - /* most sanity checks done, try to assign the new sync-after - * dependency. need to hold the global lock in there, - * to avoid a race in the dependency loop check. */ - retcode = drbd_alter_sa(mdev, sc.after); - if (retcode != NO_ERROR) - goto fail; - - fifo_size = (sc.c_plan_ahead * 10 * SLEEP_TIME) / HZ; - if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { - rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); - if (!rs_plan_s) { - dev_err(DEV, "kmalloc of fifo_buffer failed"); - retcode = ERR_NOMEM; - goto fail; - } + if (!cpumask_equal(tconn->cpu_mask, new_cpu_mask)) { + cpumask_copy(tconn->cpu_mask, new_cpu_mask); + drbd_calc_cpu_mask(tconn); + tconn->receiver.reset_cpu_mask = 1; + tconn->asender.reset_cpu_mask = 1; + tconn->worker.reset_cpu_mask = 1; } - /* ok, assign the rest of it as well. - * lock against receive_SyncParam() */ - spin_lock(&mdev->peer_seq_lock); - mdev->sync_conf = sc; - - if (!rsr) { - crypto_free_hash(mdev->csums_tfm); - mdev->csums_tfm = csums_tfm; - csums_tfm = NULL; - } - - if (!ovr) { - crypto_free_hash(mdev->verify_tfm); - mdev->verify_tfm = verify_tfm; - verify_tfm = NULL; - } - - if (fifo_size != mdev->rs_plan_s.size) { - kfree(mdev->rs_plan_s.values); - mdev->rs_plan_s.values = rs_plan_s; - mdev->rs_plan_s.size = fifo_size; - mdev->rs_planed = 0; - rs_plan_s = NULL; - } - - spin_unlock(&mdev->peer_seq_lock); - - if (get_ldev(mdev)) { - wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); - drbd_al_shrink(mdev); - err = drbd_check_al_size(mdev); - lc_unlock(mdev->act_log); - wake_up(&mdev->al_wait); - - put_ldev(mdev); - drbd_md_sync(mdev); - - if (err) { - retcode = ERR_NOMEM; - goto fail; - } - } - - if (mdev->state.conn >= C_CONNECTED) - drbd_send_sync_param(mdev, &sc); - - if (!cpumask_equal(mdev->tconn->cpu_mask, new_cpu_mask)) { - cpumask_copy(mdev->tconn->cpu_mask, new_cpu_mask); - drbd_calc_cpu_mask(mdev->tconn); - mdev->tconn->receiver.reset_cpu_mask = 1; - mdev->tconn->asender.reset_cpu_mask = 1; - mdev->tconn->worker.reset_cpu_mask = 1; - } - - kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); fail: kfree(rs_plan_s); free_cpumask_var(new_cpu_mask); - crypto_free_hash(csums_tfm); - crypto_free_hash(verify_tfm); drbd_adm_finish(info, retcode); return 0; @@ -2307,6 +2467,9 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, if (nla_put_drbd_cfg_context(skb, mdev->tconn->name, mdev->vnr)) goto nla_put_failure; + if (res_opts_to_skb(skb, &mdev->tconn->res_opts, exclude_sensitive)) + goto nla_put_failure; + if (got_ldev) if (disk_conf_to_skb(skb, &mdev->ldev->dc, exclude_sensitive)) goto nla_put_failure; @@ -2314,9 +2477,6 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, if (net_conf_to_skb(skb, mdev->tconn->net_conf, exclude_sensitive)) goto nla_put_failure; - if (syncer_conf_to_skb(skb, &mdev->sync_conf, exclude_sensitive)) - goto nla_put_failure; - nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO); if (!nla) goto nla_put_failure; @@ -2532,7 +2692,7 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) /* resume from last known position, if possible */ struct start_ov_parms parms = { .ov_start_sector = mdev->ov_start_sector }; - int err = start_ov_parms_from_attrs(&parms, info->attrs); + int err = start_ov_parms_from_attrs(&parms, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -2568,7 +2728,7 @@ int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info) mdev = adm_ctx.mdev; memset(&args, 0, sizeof(args)); if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) { - err = new_c_uuid_parms_from_attrs(&args, info->attrs); + err = new_c_uuid_parms_from_attrs(&args, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 50c52712715..c8c826b2444 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -763,7 +763,7 @@ int drbd_connected(int vnr, void *p, void *data) &mdev->tconn->cstate_mutex : &mdev->own_state_mutex; - ok &= drbd_send_sync_param(mdev, &mdev->sync_conf); + ok &= drbd_send_sync_param(mdev); ok &= drbd_send_sizes(mdev, 0, 0); ok &= drbd_send_uuids(mdev); ok &= drbd_send_state(mdev); @@ -2085,7 +2085,7 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector) int throttle = 0; /* feature disabled? */ - if (mdev->sync_conf.c_min_rate == 0) + if (mdev->ldev->dc.c_min_rate == 0) return 0; spin_lock_irq(&mdev->al_lock); @@ -2125,7 +2125,7 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector) db = mdev->rs_mark_left[i] - rs_left; dbdt = Bit2KB(db/dt); - if (dbdt > mdev->sync_conf.c_min_rate) + if (dbdt > mdev->ldev->dc.c_min_rate) throttle = 1; } return throttle; @@ -3001,7 +3001,10 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, if (drbd_recv(mdev->tconn, &p->head.payload, header_size) != header_size) return false; - mdev->sync_conf.rate = be32_to_cpu(p->rate); + if (get_ldev(mdev)) { + mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate); + put_ldev(mdev); + } if (apv >= 88) { if (apv == 88) { @@ -3029,10 +3032,10 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, p->csums_alg[SHARED_SECRET_MAX-1] = 0; } - if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) { + if (strcmp(mdev->tconn->net_conf->verify_alg, p->verify_alg)) { if (mdev->state.conn == C_WF_REPORT_PARAMS) { dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n", - mdev->sync_conf.verify_alg, p->verify_alg); + mdev->tconn->net_conf->verify_alg, p->verify_alg); goto disconnect; } verify_tfm = drbd_crypto_alloc_digest_safe(mdev, @@ -3043,10 +3046,10 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, } } - if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) { + if (apv >= 89 && strcmp(mdev->tconn->net_conf->csums_alg, p->csums_alg)) { if (mdev->state.conn == C_WF_REPORT_PARAMS) { dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n", - mdev->sync_conf.csums_alg, p->csums_alg); + mdev->tconn->net_conf->csums_alg, p->csums_alg); goto disconnect; } csums_tfm = drbd_crypto_alloc_digest_safe(mdev, @@ -3057,37 +3060,39 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, } } - if (apv > 94) { - mdev->sync_conf.rate = be32_to_cpu(p->rate); - mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead); - mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target); - mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target); - mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate); + if (apv > 94 && get_ldev(mdev)) { + mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate); + mdev->ldev->dc.c_plan_ahead = be32_to_cpu(p->c_plan_ahead); + mdev->ldev->dc.c_delay_target = be32_to_cpu(p->c_delay_target); + mdev->ldev->dc.c_fill_target = be32_to_cpu(p->c_fill_target); + mdev->ldev->dc.c_max_rate = be32_to_cpu(p->c_max_rate); - fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; + fifo_size = (mdev->ldev->dc.c_plan_ahead * 10 * SLEEP_TIME) / HZ; if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); if (!rs_plan_s) { dev_err(DEV, "kmalloc of fifo_buffer failed"); + put_ldev(mdev); goto disconnect; } } + put_ldev(mdev); } spin_lock(&mdev->peer_seq_lock); /* lock against drbd_nl_syncer_conf() */ if (verify_tfm) { - strcpy(mdev->sync_conf.verify_alg, p->verify_alg); - mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1; - crypto_free_hash(mdev->verify_tfm); - mdev->verify_tfm = verify_tfm; + strcpy(mdev->tconn->net_conf->verify_alg, p->verify_alg); + mdev->tconn->net_conf->verify_alg_len = strlen(p->verify_alg) + 1; + crypto_free_hash(mdev->tconn->verify_tfm); + mdev->tconn->verify_tfm = verify_tfm; dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg); } if (csums_tfm) { - strcpy(mdev->sync_conf.csums_alg, p->csums_alg); - mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1; - crypto_free_hash(mdev->csums_tfm); - mdev->csums_tfm = csums_tfm; + strcpy(mdev->tconn->net_conf->csums_alg, p->csums_alg); + mdev->tconn->net_conf->csums_alg_len = strlen(p->csums_alg) + 1; + crypto_free_hash(mdev->tconn->csums_tfm); + mdev->tconn->csums_tfm = csums_tfm; dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg); } if (fifo_size != mdev->rs_plan_s.size) { diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 11685658659..77fad527fb1 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -402,7 +402,7 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) rv = SS_CONNECTED_OUTDATES; else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && - (mdev->sync_conf.verify_alg[0] == 0)) + (mdev->tconn->net_conf->verify_alg[0] == 0)) rv = SS_NO_VERIFY_ALG; else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && @@ -668,7 +668,7 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED)) ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */ - if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO && + if (mdev->tconn->res_opts.on_no_data == OND_SUSPEND_IO && (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)) ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index a730520e468..005876b32f7 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -310,12 +310,12 @@ static int w_e_send_csum(struct drbd_work *w, int cancel) if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0)) goto out; - digest_size = crypto_hash_digestsize(mdev->csums_tfm); + digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (digest) { sector_t sector = peer_req->i.sector; unsigned int size = peer_req->i.size; - drbd_csum_ee(mdev, mdev->csums_tfm, peer_req, digest); + drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest); /* Free peer_req and pages before send. * In case we block on congestion, we could otherwise run into * some distributed deadlock, if the other side blocks on @@ -451,13 +451,13 @@ static int drbd_rs_controller(struct drbd_conf *mdev) spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */ - steps = mdev->rs_plan_s.size; /* (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ + steps = mdev->rs_plan_s.size; /* (mdev->ldev->dc.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */ - want = ((mdev->sync_conf.rate * 2 * SLEEP_TIME) / HZ) * steps; + want = ((mdev->ldev->dc.resync_rate * 2 * SLEEP_TIME) / HZ) * steps; } else { /* normal path */ - want = mdev->sync_conf.c_fill_target ? mdev->sync_conf.c_fill_target : - sect_in * mdev->sync_conf.c_delay_target * HZ / (SLEEP_TIME * 10); + want = mdev->ldev->dc.c_fill_target ? mdev->ldev->dc.c_fill_target : + sect_in * mdev->ldev->dc.c_delay_target * HZ / (SLEEP_TIME * 10); } correction = want - mdev->rs_in_flight - mdev->rs_planed; @@ -476,7 +476,7 @@ static int drbd_rs_controller(struct drbd_conf *mdev) if (req_sect < 0) req_sect = 0; - max_sect = (mdev->sync_conf.c_max_rate * 2 * SLEEP_TIME) / HZ; + max_sect = (mdev->ldev->dc.c_max_rate * 2 * SLEEP_TIME) / HZ; if (req_sect > max_sect) req_sect = max_sect; @@ -492,11 +492,11 @@ static int drbd_rs_controller(struct drbd_conf *mdev) static int drbd_rs_number_requests(struct drbd_conf *mdev) { int number; - if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */ + if (mdev->rs_plan_s.size) { /* mdev->ldev->dc.c_plan_ahead */ number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9); mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; } else { - mdev->c_sync_rate = mdev->sync_conf.rate; + mdev->c_sync_rate = mdev->ldev->dc.resync_rate; number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); } @@ -619,7 +619,7 @@ next_sector: /* adjust very last sectors, in case we are oddly sized */ if (sector + (size>>9) > capacity) size = (capacity-sector)<<9; - if (mdev->tconn->agreed_pro_version >= 89 && mdev->csums_tfm) { + if (mdev->tconn->agreed_pro_version >= 89 && mdev->tconn->csums_tfm) { switch (read_for_csum(mdev, sector, size)) { case -EIO: /* Disk failure */ put_ldev(mdev); @@ -810,7 +810,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) khelper_cmd = "after-resync-target"; - if (mdev->csums_tfm && mdev->rs_total) { + if (mdev->tconn->csums_tfm && mdev->rs_total) { const unsigned long s = mdev->rs_same_csum; const unsigned long t = mdev->rs_total; const int ratio = @@ -1019,13 +1019,13 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) /* quick hack to try to avoid a race against reconfiguration. * a real fix would be much more involved, * introducing more locking mechanisms */ - if (mdev->csums_tfm) { - digest_size = crypto_hash_digestsize(mdev->csums_tfm); + if (mdev->tconn->csums_tfm) { + digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm); D_ASSERT(digest_size == di->digest_size); digest = kmalloc(digest_size, GFP_NOIO); } if (digest) { - drbd_csum_ee(mdev, mdev->csums_tfm, peer_req, digest); + drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest); eq = !memcmp(digest, di->digest, digest_size); kfree(digest); } @@ -1069,7 +1069,7 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel) if (unlikely(cancel)) goto out; - digest_size = crypto_hash_digestsize(mdev->verify_tfm); + digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (!digest) { ok = 0; /* terminate the connection in case the allocation failed */ @@ -1077,7 +1077,7 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel) } if (likely(!(peer_req->flags & EE_WAS_ERROR))) - drbd_csum_ee(mdev, mdev->verify_tfm, peer_req, digest); + drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest); else memset(digest, 0, digest_size); @@ -1141,10 +1141,10 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) di = peer_req->digest; if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { - digest_size = crypto_hash_digestsize(mdev->verify_tfm); + digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (digest) { - drbd_csum_ee(mdev, mdev->verify_tfm, peer_req, digest); + drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest); D_ASSERT(digest_size == di->digest_size); eq = !memcmp(digest, di->digest, digest_size); @@ -1319,9 +1319,9 @@ static int _drbd_may_sync_now(struct drbd_conf *mdev) struct drbd_conf *odev = mdev; while (1) { - if (odev->sync_conf.after == -1) + if (odev->ldev->dc.resync_after == -1) return 1; - odev = minor_to_mdev(odev->sync_conf.after); + odev = minor_to_mdev(odev->ldev->dc.resync_after); if (!expect(odev)) return 1; if ((odev->state.conn >= C_SYNC_SOURCE && @@ -1408,11 +1408,11 @@ static int sync_after_error(struct drbd_conf *mdev, int o_minor) return ERR_SYNC_AFTER_CYCLE; /* dependency chain ends here, no cycles. */ - if (odev->sync_conf.after == -1) + if (odev->ldev->dc.resync_after == -1) return NO_ERROR; /* follow the dependency chain */ - odev = minor_to_mdev(odev->sync_conf.after); + odev = minor_to_mdev(odev->ldev->dc.resync_after); } } @@ -1424,7 +1424,7 @@ int drbd_alter_sa(struct drbd_conf *mdev, int na) write_lock_irq(&global_state_lock); retcode = sync_after_error(mdev, na); if (retcode == NO_ERROR) { - mdev->sync_conf.after = na; + mdev->ldev->dc.resync_after = na; do { changes = _drbd_pause_after(mdev); changes |= _drbd_resume_next(mdev); @@ -1637,7 +1637,7 @@ int drbd_worker(struct drbd_thread *thi) struct drbd_work *w = NULL; struct drbd_conf *mdev; LIST_HEAD(work_list); - int minor, intr = 0; + int vnr, intr = 0; while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); @@ -1722,7 +1722,7 @@ int drbd_worker(struct drbd_thread *thi) spin_unlock_irq(&tconn->data.work.q_lock); drbd_thread_stop(&tconn->receiver); - idr_for_each_entry(&tconn->volumes, mdev, minor) { + idr_for_each_entry(&tconn->volumes, mdev, vnr) { D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); /* _drbd_set_state only uses stop_nowait. * wait here for the exiting receiver. */ diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index a07d69279b1..938e8560a83 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -102,66 +102,73 @@ GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, ) GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, - __u64_field(1, GENLA_F_MANDATORY, disk_size) - __str_field(2, GENLA_F_REQUIRED, backing_dev, 128) - __str_field(3, GENLA_F_REQUIRED, meta_dev, 128) - __u32_field(4, GENLA_F_REQUIRED, meta_dev_idx) - __u32_field(5, GENLA_F_MANDATORY, max_bio_bvecs) + __str_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT, backing_dev, 128) + __str_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT, meta_dev, 128) + __u32_field(3, GENLA_F_REQUIRED | GENLA_F_INVARIANT, meta_dev_idx) + + /* use the resize command to try and change the disk_size */ + __u64_field(4, GENLA_F_MANDATORY | GENLA_F_INVARIANT, disk_size) + /* we could change the max_bio_bvecs, + * but it won't propagate through the stack */ + __u32_field(5, GENLA_F_MANDATORY | GENLA_F_INVARIANT, max_bio_bvecs) + __u32_field(6, GENLA_F_MANDATORY, on_io_error) __u32_field(7, GENLA_F_MANDATORY, fencing) - __flg_field(8, GENLA_F_MANDATORY, no_disk_barrier) - __flg_field(9, GENLA_F_MANDATORY, no_disk_flush) - __flg_field(10, GENLA_F_MANDATORY, no_disk_drain) - __flg_field(11, GENLA_F_MANDATORY, no_md_flush) - __flg_field(12, GENLA_F_MANDATORY, use_bmbv) + + __u32_field(8, GENLA_F_MANDATORY, resync_rate) + __u32_field(9, GENLA_F_MANDATORY, resync_after) + __u32_field(10, GENLA_F_MANDATORY, al_extents) + __u32_field(11, GENLA_F_MANDATORY, c_plan_ahead) + __u32_field(12, GENLA_F_MANDATORY, c_delay_target) + __u32_field(13, GENLA_F_MANDATORY, c_fill_target) + __u32_field(14, GENLA_F_MANDATORY, c_max_rate) + __u32_field(15, GENLA_F_MANDATORY, c_min_rate) + + __flg_field(16, GENLA_F_MANDATORY, no_disk_barrier) + __flg_field(17, GENLA_F_MANDATORY, no_disk_flush) + __flg_field(18, GENLA_F_MANDATORY, no_disk_drain) + __flg_field(19, GENLA_F_MANDATORY, no_md_flush) + ) -GENL_struct(DRBD_NLA_SYNCER_CONF, 4, syncer_conf, - __u32_field(1, GENLA_F_MANDATORY, rate) - __u32_field(2, GENLA_F_MANDATORY, after) - __u32_field(3, GENLA_F_MANDATORY, al_extents) - __str_field(4, GENLA_F_MANDATORY, cpu_mask, 32) - __str_field(5, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) - __str_field(6, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) - __flg_field(7, GENLA_F_MANDATORY, use_rle) - __u32_field(8, GENLA_F_MANDATORY, on_no_data) - __u32_field(9, GENLA_F_MANDATORY, c_plan_ahead) - __u32_field(10, GENLA_F_MANDATORY, c_delay_target) - __u32_field(11, GENLA_F_MANDATORY, c_fill_target) - __u32_field(12, GENLA_F_MANDATORY, c_max_rate) - __u32_field(13, GENLA_F_MANDATORY, c_min_rate) +GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, + __str_field(1, GENLA_F_MANDATORY, cpu_mask, 32) + __u32_field(2, GENLA_F_MANDATORY, on_no_data) ) GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, - __str_field(1, GENLA_F_MANDATORY | GENLA_F_SENSITIVE, + __bin_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT, my_addr, 128) + __bin_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT, peer_addr, 128) + __str_field(3, GENLA_F_MANDATORY | GENLA_F_SENSITIVE, shared_secret, SHARED_SECRET_MAX) - __str_field(2, GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) - __str_field(3, GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) - __str_field(4, GENLA_F_REQUIRED, my_addr, 128) - __str_field(5, GENLA_F_REQUIRED, peer_addr, 128) - __u32_field(6, GENLA_F_REQUIRED, wire_protocol) - __u32_field(7, GENLA_F_MANDATORY, try_connect_int) - __u32_field(8, GENLA_F_MANDATORY, timeout) - __u32_field(9, GENLA_F_MANDATORY, ping_int) - __u32_field(10, GENLA_F_MANDATORY, ping_timeo) - __u32_field(11, GENLA_F_MANDATORY, sndbuf_size) - __u32_field(12, GENLA_F_MANDATORY, rcvbuf_size) - __u32_field(13, GENLA_F_MANDATORY, ko_count) - __u32_field(14, GENLA_F_MANDATORY, max_buffers) - __u32_field(15, GENLA_F_MANDATORY, max_epoch_size) - __u32_field(16, GENLA_F_MANDATORY, unplug_watermark) - __u32_field(17, GENLA_F_MANDATORY, after_sb_0p) - __u32_field(18, GENLA_F_MANDATORY, after_sb_1p) - __u32_field(19, GENLA_F_MANDATORY, after_sb_2p) - __u32_field(20, GENLA_F_MANDATORY, rr_conflict) - __u32_field(21, GENLA_F_MANDATORY, on_congestion) - __u32_field(22, GENLA_F_MANDATORY, cong_fill) - __u32_field(23, GENLA_F_MANDATORY, cong_extents) - __flg_field(24, GENLA_F_MANDATORY, two_primaries) - __flg_field(25, GENLA_F_MANDATORY, want_lose) - __flg_field(26, GENLA_F_MANDATORY, no_cork) - __flg_field(27, GENLA_F_MANDATORY, always_asbp) - __flg_field(28, GENLA_F_MANDATORY, dry_run) + __str_field(4, GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) + __str_field(5, GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) + __str_field(6, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) + __str_field(7, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) + __u32_field(8, GENLA_F_MANDATORY, wire_protocol) + __u32_field(9, GENLA_F_MANDATORY, try_connect_int) + __u32_field(10, GENLA_F_MANDATORY, timeout) + __u32_field(11, GENLA_F_MANDATORY, ping_int) + __u32_field(12, GENLA_F_MANDATORY, ping_timeo) + __u32_field(13, GENLA_F_MANDATORY, sndbuf_size) + __u32_field(14, GENLA_F_MANDATORY, rcvbuf_size) + __u32_field(15, GENLA_F_MANDATORY, ko_count) + __u32_field(16, GENLA_F_MANDATORY, max_buffers) + __u32_field(17, GENLA_F_MANDATORY, max_epoch_size) + __u32_field(18, GENLA_F_MANDATORY, unplug_watermark) + __u32_field(19, GENLA_F_MANDATORY, after_sb_0p) + __u32_field(20, GENLA_F_MANDATORY, after_sb_1p) + __u32_field(21, GENLA_F_MANDATORY, after_sb_2p) + __u32_field(22, GENLA_F_MANDATORY, rr_conflict) + __u32_field(23, GENLA_F_MANDATORY, on_congestion) + __u32_field(24, GENLA_F_MANDATORY, cong_fill) + __u32_field(25, GENLA_F_MANDATORY, cong_extents) + __flg_field(26, GENLA_F_MANDATORY, two_primaries) + __flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT, want_lose) + __flg_field(28, GENLA_F_MANDATORY, no_cork) + __flg_field(29, GENLA_F_MANDATORY, always_asbp) + __flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT, dry_run) + __flg_field(31, GENLA_F_MANDATORY, use_rle) ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, @@ -270,11 +277,10 @@ GENL_op(DRBD_ADM_ADD_LINK, 7, GENL_doit(drbd_adm_create_connection), GENL_op(DRBD_ADM_DEL_LINK, 8, GENL_doit(drbd_adm_delete_connection), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) - /* operates on replication links */ -GENL_op(DRBD_ADM_SYNCER, 9, - GENL_doit(drbd_adm_syncer), +GENL_op(DRBD_ADM_RESOURCE_OPTS, 9, + GENL_doit(drbd_adm_resource_opts), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, GENLA_F_MANDATORY) ) GENL_op( @@ -284,16 +290,28 @@ GENL_op( GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED) ) +GENL_op( + DRBD_ADM_CHG_NET_OPTS, 29, + GENL_doit(drbd_adm_net_opts), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED) +) + GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) - /* operates on minors */ GENL_op(DRBD_ADM_ATTACH, 12, GENL_doit(drbd_adm_attach), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_REQUIRED) ) +GENL_op(DRBD_ADM_CHG_DISK_OPTS, 28, + GENL_doit(drbd_adm_disk_opts), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_DISK_OPTS, GENLA_F_REQUIRED) +) + GENL_op( DRBD_ADM_RESIZE, 13, GENL_doit(drbd_adm_resize), @@ -301,7 +319,6 @@ GENL_op( GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, GENLA_F_MANDATORY) ) - /* operates on all volumes within a resource */ GENL_op( DRBD_ADM_PRIMARY, 14, GENL_doit(drbd_adm_set_role), diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 22920a8af4e..659a8eb3883 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -166,5 +166,7 @@ #define DRBD_CONG_EXTENTS_MAX DRBD_AL_EXTENTS_MAX #define DRBD_CONG_EXTENTS_DEF DRBD_AL_EXTENTS_DEF +#define DRBD_PROTOCOL_DEF DRBD_PROT_C + #undef RANGE #endif diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index c8c67239f61..e458282a372 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -190,11 +190,12 @@ static struct nlattr *nested_attr_tb[128]; #undef GENL_struct #define GENL_struct(tag_name, tag_number, s_name, s_fields) \ - /* static, potentially unused */ \ -int s_name ## _from_attrs(struct s_name *s, struct nlattr *tb[]) \ +/* *_from_attrs functions are static, but potentially unused */ \ +static int __ ## s_name ## _from_attrs(struct s_name *s, \ + struct genl_info *info, bool exclude_invariants) \ { \ const int maxtype = ARRAY_SIZE(s_name ## _nl_policy)-1; \ - struct nlattr *tla = tb[tag_number]; \ + struct nlattr *tla = info->attrs[tag_number]; \ struct nlattr **ntb = nested_attr_tb; \ struct nlattr *nla; \ int err; \ @@ -211,33 +212,49 @@ int s_name ## _from_attrs(struct s_name *s, struct nlattr *tb[]) \ \ s_fields \ return 0; \ -} +} __attribute__((unused)) \ +static int s_name ## _from_attrs(struct s_name *s, \ + struct genl_info *info) \ +{ \ + return __ ## s_name ## _from_attrs(s, info, false); \ +} __attribute__((unused)) \ +static int s_name ## _from_attrs_for_change(struct s_name *s, \ + struct genl_info *info) \ +{ \ + return __ ## s_name ## _from_attrs(s, info, true); \ +} __attribute__((unused)) \ -#undef __field -#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ +#define __assign(attr_nr, attr_flag, name, nla_type, type, assignment...) \ nla = ntb[__nla_type(attr_nr)]; \ if (nla) { \ - if (s) \ - s->name = __get(nla); \ - DPRINT_FIELD("<<", nla_type, name, s, nla); \ + if (exclude_invariants && ((attr_flag) & GENLA_F_INVARIANT)) { \ + pr_info("<< must not change invariant attr: %s\n", #name); \ + return -EEXIST; \ + } \ + assignment; \ + } else if (exclude_invariants && ((attr_flag) & GENLA_F_INVARIANT)) { \ + /* attribute missing from payload, */ \ + /* which was expected */ \ } else if ((attr_flag) & GENLA_F_REQUIRED) { \ pr_info("<< missing attr: %s\n", #name); \ return -ENOMSG; \ } +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + __assign(attr_nr, attr_flag, name, nla_type, type, \ + if (s) \ + s->name = __get(nla); \ + DPRINT_FIELD("<<", nla_type, name, s, nla)) + /* validate_nla() already checked nla_len <= maxlen appropriately. */ #undef __array #define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ - nla = ntb[__nla_type(attr_nr)]; \ - if (nla) { \ + __assign(attr_nr, attr_flag, name, nla_type, type, \ if (s) \ s->name ## _len = \ __get(s->name, nla, maxlen); \ - DPRINT_ARRAY("<<", nla_type, name, s, nla); \ - } else if ((attr_flag) & GENLA_F_REQUIRED) { \ - pr_info("<< missing attr: %s\n", #name); \ - return -ENOMSG; \ - } \ + DPRINT_ARRAY("<<", nla_type, name, s, nla)) #include GENL_MAGIC_INCLUDE_FILE diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index 745ebfd6c7e..9a605b9ee83 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -59,12 +59,20 @@ enum { GENLA_F_MANDATORY = 1 << 14, GENLA_F_REQUIRED = 1 << 15, - /* This will not be present in the __u16 .nla_type, but can be - * triggered on in _to_skb, to exclude "sensitive" - * information from broadcasts, or on unpriviledged get requests. - * This is useful because genetlink multicast groups can be listened in - * on by anyone. */ + /* Below will not be present in the __u16 .nla_type, but can be + * triggered on in _to_skb resp. _from_attrs */ + + /* To exclude "sensitive" information from broadcasts, or on + * unpriviledged get requests. This is useful because genetlink + * multicast groups can be listened in on by anyone. */ GENLA_F_SENSITIVE = 1 << 16, + + /* INVARIAN options cannot be changed at runtime. + * Useful to share an attribute policy and struct definition, + * between some "create" and "change" commands, + * but disallow certain fields to be changed online. + */ + GENLA_F_INVARIANT = 1 << 17, }; #define __nla_type(x) ((__u16)((__u16)(x) & (__u16)NLA_TYPE_MASK)) From e90285e0ba11c1a4a09b8a50d8d5b18e1298d245 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 22 Mar 2011 12:51:21 +0100 Subject: [PATCH 200/609] drbd: Fixed conn_lowest_minor It actually returned the lowest volume number. While doing that renamed a few wrongly named variables. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 12 +++++++----- drivers/block/drbd/drbd_nl.c | 4 ++-- drivers/block/drbd/drbd_state.c | 8 ++++---- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index bdb12723585..2dfbcfa1a7b 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -434,7 +434,7 @@ void tl_clear(struct drbd_tconn *tconn) struct drbd_conf *mdev; struct list_head *le, *tle; struct drbd_request *r; - int minor; + int vnr; spin_lock_irq(&tconn->req_lock); @@ -453,7 +453,7 @@ void tl_clear(struct drbd_tconn *tconn) } /* ensure bit indicating barrier is required is clear */ - idr_for_each_entry(&tconn->volumes, mdev, minor) + idr_for_each_entry(&tconn->volumes, mdev, vnr) clear_bit(CREATE_BARRIER, &mdev->flags); spin_unlock_irq(&tconn->req_lock); @@ -634,11 +634,13 @@ char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *tas int conn_lowest_minor(struct drbd_tconn *tconn) { - int minor = 0; + int vnr = 0; + struct drbd_conf *mdev; - if (!idr_get_next(&tconn->volumes, &minor)) + mdev = idr_get_next(&tconn->volumes, &vnr); + if (!mdev) return -1; - return minor; + return mdev_to_minor(mdev); } #ifdef CONFIG_SMP diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 18cd2ed4e8c..d903fb5ea41 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -326,9 +326,9 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) static void conn_md_sync(struct drbd_tconn *tconn) { struct drbd_conf *mdev; - int minor; + int vnr; - idr_for_each_entry(&tconn->volumes, mdev, minor) + idr_for_each_entry(&tconn->volumes, mdev, vnr) drbd_md_sync(mdev); } diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 77fad527fb1..cb08e011c28 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -50,9 +50,9 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state bool conn_all_vols_unconf(struct drbd_tconn *tconn) { struct drbd_conf *mdev; - int minor; + int vnr; - idr_for_each_entry(&tconn->volumes, mdev, minor) { + idr_for_each_entry(&tconn->volumes, mdev, vnr) { if (mdev->state.disk != D_DISKLESS || mdev->state.conn != C_STANDALONE || mdev->state.role != R_SECONDARY) @@ -332,9 +332,9 @@ static void print_state_change(struct drbd_conf *mdev, union drbd_state os, unio static bool vol_has_primary_peer(struct drbd_tconn *tconn) { struct drbd_conf *mdev; - int minor; + int vnr; - idr_for_each_entry(&tconn->volumes, mdev, minor) { + idr_for_each_entry(&tconn->volumes, mdev, vnr) { if (mdev->state.peer == R_PRIMARY) return true; } From c0d42c8e5724e470873fa7491200c083600f5787 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 9 Dec 2010 23:52:22 +0100 Subject: [PATCH 201/609] drbd: drbd_send(): Return a "real" error code if we have no socket Q: Can this case even trigger? Is failing this way any better than one that causes a NULL pointer access? Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2dfbcfa1a7b..52378ccda74 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1703,7 +1703,7 @@ int drbd_send(struct drbd_tconn *tconn, struct socket *sock, int rv, sent = 0; if (!sock) - return -1000; + return -EBADR; /* THINK if (signal_pending) return ... ? */ From 11b0be28e57fabeb75edfe81a17eddfc484cd9df Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 15 Mar 2011 16:15:10 +0100 Subject: [PATCH 202/609] drbd: drbd_get_data_sock(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 17 ++++------------- drivers/block/drbd/drbd_main.c | 8 ++++---- drivers/block/drbd/drbd_worker.c | 2 +- 3 files changed, 9 insertions(+), 18 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index bc265f3733c..daa4a74da10 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1115,24 +1115,15 @@ static inline struct drbd_conf *vnr_to_mdev(struct drbd_tconn *tconn, int vnr) return (struct drbd_conf *)idr_find(&tconn->volumes, vnr); } -/* returns 1 if it was successful, - * returns 0 if there was no data socket. - * so wherever you are going to use the data.socket, e.g. do - * if (!drbd_get_data_sock(mdev->tconn)) - * return 0; - * CODE(); - * drbd_get_data_sock(mdev->tconn); - */ static inline int drbd_get_data_sock(struct drbd_tconn *tconn) { mutex_lock(&tconn->data.mutex); - /* drbd_disconnect() could have called drbd_free_sock() - * while we were waiting in down()... */ - if (unlikely(tconn->data.socket == NULL)) { + if (!tconn->data.socket) { + /* Disconnected. */ mutex_unlock(&tconn->data.mutex); - return 0; + return -EIO; } - return 1; + return 0; } static inline void drbd_put_data_sock(struct drbd_tconn *tconn) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 52378ccda74..d4546d03824 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -773,7 +773,7 @@ int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data, prepare_header80(&h, cmd, size); - if (!drbd_get_data_sock(tconn)) + if (drbd_get_data_sock(tconn)) return 0; ok = (sizeof(h) == @@ -1245,7 +1245,7 @@ int drbd_send_bitmap(struct drbd_conf *mdev) { int err; - if (!drbd_get_data_sock(mdev->tconn)) + if (drbd_get_data_sock(mdev->tconn)) return -1; err = !_drbd_send_bitmap(mdev); drbd_put_data_sock(mdev->tconn); @@ -1562,7 +1562,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) void *dgb; int dgs; - if (!drbd_get_data_sock(mdev->tconn)) + if (drbd_get_data_sock(mdev->tconn)) return 0; dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ? @@ -1652,7 +1652,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, * This one may be interrupted by DRBD_SIG and/or DRBD_SIGKILL * in response to admin command or module unload. */ - if (!drbd_get_data_sock(mdev->tconn)) + if (drbd_get_data_sock(mdev->tconn)) return 0; ok = sizeof(p) == drbd_send(mdev->tconn, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 005876b32f7..39e49152e0b 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1209,7 +1209,7 @@ int w_send_barrier(struct drbd_work *w, int cancel) if (cancel) return 1; - if (!drbd_get_data_sock(mdev->tconn)) + if (drbd_get_data_sock(mdev->tconn)) return 0; p->barrier = b->br_number; /* inc_ap_pending was done where this was queued. From fb708e408f0d75953114be66bdbeb9065ca09a33 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 15 Dec 2010 17:04:36 +0100 Subject: [PATCH 203/609] drbd: Add drbd_send_all(): Send an entire buffer Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 3 +++ drivers/block/drbd/drbd_main.c | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index daa4a74da10..0bd8c2a800c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1161,6 +1161,9 @@ extern void _tl_add_barrier(struct drbd_tconn *, struct drbd_tl_epoch *); extern void drbd_free_sock(struct drbd_tconn *tconn); extern int drbd_send(struct drbd_tconn *tconn, struct socket *sock, void *buf, size_t size, unsigned msg_flags); +extern int drbd_send_all(struct drbd_tconn *, struct socket *, void *, size_t, + unsigned); + extern int drbd_send_protocol(struct drbd_tconn *tconn); extern int drbd_send_uuids(struct drbd_conf *mdev); extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index d4546d03824..7f2afb8115c 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1764,6 +1764,24 @@ int drbd_send(struct drbd_tconn *tconn, struct socket *sock, return sent; } +/** + * drbd_send_all - Send an entire buffer + * + * Returns 0 upon success and a negative error value otherwise. + */ +int drbd_send_all(struct drbd_tconn *tconn, struct socket *sock, void *buffer, + size_t size, unsigned msg_flags) +{ + int err; + + err = drbd_send(tconn, sock, buffer, size, msg_flags); + if (err < 0) + return err; + if (err != size) + return -EIO; + return 0; +} + static int drbd_open(struct block_device *bdev, fmode_t mode) { struct drbd_conf *mdev = bdev->bd_disk->private_data; From ce9879cb1f992448e10549525c8c46f9ca770a5b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 15 Mar 2011 23:34:29 +0100 Subject: [PATCH 204/609] drbd: conn_send_cmd2(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 24 ++++++++++-------------- drivers/block/drbd/drbd_receiver.c | 4 ++-- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 7f2afb8115c..018c336aea0 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -769,21 +769,17 @@ int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data, size_t size) { struct p_header80 h; - int ok; + int err; prepare_header80(&h, cmd, size); - - if (drbd_get_data_sock(tconn)) - return 0; - - ok = (sizeof(h) == - drbd_send(tconn, tconn->data.socket, &h, sizeof(h), 0)); - ok = ok && (size == - drbd_send(tconn, tconn->data.socket, data, size, 0)); - - drbd_put_data_sock(tconn); - - return ok; + err = drbd_get_data_sock(tconn); + if (!err) { + err = drbd_send_all(tconn, tconn->data.socket, &h, sizeof(h), 0); + if (!err) + err = drbd_send_all(tconn, tconn->data.socket, data, size, 0); + drbd_put_data_sock(tconn); + } + return err; } int drbd_send_sync_param(struct drbd_conf *mdev) @@ -882,7 +878,7 @@ int drbd_send_protocol(struct drbd_tconn *tconn) if (tconn->agreed_pro_version >= 87) strcpy(p->integrity_alg, tconn->net_conf->integrity_alg); - rv = conn_send_cmd2(tconn, P_PROTOCOL, p->head.payload, size - sizeof(struct p_header)); + rv = !conn_send_cmd2(tconn, P_PROTOCOL, p->head.payload, size - sizeof(struct p_header)); kfree(p); return rv; } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c8c826b2444..9c781ff1aa8 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4265,7 +4265,7 @@ static int drbd_do_auth(struct drbd_tconn *tconn) get_random_bytes(my_challenge, CHALLENGE_LEN); - rv = conn_send_cmd2(tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN); + rv = !conn_send_cmd2(tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN); if (!rv) goto fail; @@ -4320,7 +4320,7 @@ static int drbd_do_auth(struct drbd_tconn *tconn) goto fail; } - rv = conn_send_cmd2(tconn, P_AUTH_RESPONSE, response, resp_size); + rv = !conn_send_cmd2(tconn, P_AUTH_RESPONSE, response, resp_size); if (!rv) goto fail; From ecf2363cb590eb80977ac1d8259d9fa9a6250145 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 15 Mar 2011 23:48:25 +0100 Subject: [PATCH 205/609] drbd: _conn_send_cmd(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 17 +++++++---------- drivers/block/drbd/drbd_receiver.c | 6 +++--- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 0bd8c2a800c..1f17d837db4 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1926,7 +1926,7 @@ static inline int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, enum drbd_packet cmd, struct p_header *h, size_t size, unsigned msg_flags) { - return _conn_send_cmd(mdev->tconn, mdev->vnr, sock, cmd, h, size, msg_flags); + return !_conn_send_cmd(mdev->tconn, mdev->vnr, sock, cmd, h, size, msg_flags); } static inline int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 018c336aea0..ff0085b98be 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -723,17 +723,14 @@ int _conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct socket *sock, enum drbd_packet cmd, struct p_header *h, size_t size, unsigned msg_flags) { - int sent, ok; + int err; _prepare_header(tconn, vnr, h, cmd, size - sizeof(struct p_header)); - - sent = drbd_send(tconn, sock, h, size, msg_flags); - - ok = (sent == size); - if (!ok && !signal_pending(current)) - conn_warn(tconn, "short sent %s size=%d sent=%d\n", - cmdname(cmd), (int)size, sent); - return ok; + err = drbd_send_all(tconn, sock, h, size, msg_flags); + if (err && !signal_pending(current)) + conn_warn(tconn, "short send %s size=%d\n", + cmdname(cmd), (int)size); + return err; } /* don't pass the socket. we may only look at it @@ -756,7 +753,7 @@ int conn_send_cmd(struct drbd_tconn *tconn, int vnr, int use_data_socket, /* drbd_disconnect() could have called drbd_free_sock() * while we were waiting in down()... */ if (likely(sock != NULL)) - ok = _conn_send_cmd(tconn, vnr, sock, cmd, h, size, 0); + ok = !_conn_send_cmd(tconn, vnr, sock, cmd, h, size, 0); if (use_data_socket) mutex_unlock(&tconn->data.mutex); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 9c781ff1aa8..d21c2ffe6c6 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -711,7 +711,7 @@ static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd { struct p_header *h = &tconn->data.sbuf.header; - return _conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0); + return !_conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0); } static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock) @@ -4150,8 +4150,8 @@ static int drbd_send_handshake(struct drbd_tconn *tconn) memset(p, 0, sizeof(*p)); p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); - ok = _conn_send_cmd(tconn, 0, tconn->data.socket, P_HAND_SHAKE, - &p->head, sizeof(*p), 0); + ok = !_conn_send_cmd(tconn, 0, tconn->data.socket, P_HAND_SHAKE, + &p->head, sizeof(*p), 0); mutex_unlock(&tconn->data.mutex); return ok; } From 04dfa137881efc890544c5cd3af94e54cfe0c480 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 15 Mar 2011 23:51:21 +0100 Subject: [PATCH 206/609] drbd: _drbd_send_cmd(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 15 +++++++-------- drivers/block/drbd/drbd_worker.c | 4 ++-- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 1f17d837db4..0bd8c2a800c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1926,7 +1926,7 @@ static inline int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, enum drbd_packet cmd, struct p_header *h, size_t size, unsigned msg_flags) { - return !_conn_send_cmd(mdev->tconn, mdev->vnr, sock, cmd, h, size, msg_flags); + return _conn_send_cmd(mdev->tconn, mdev->vnr, sock, cmd, h, size, msg_flags); } static inline int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index ff0085b98be..2177915f04a 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -827,7 +827,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev) if (apv >= 89) strcpy(p->csums_alg, mdev->tconn->net_conf->csums_alg); - rv = _drbd_send_cmd(mdev, sock, cmd, &p->head, size, 0); + rv = !_drbd_send_cmd(mdev, sock, cmd, &p->head, size, 0); } else rv = 0; /* not ok */ @@ -995,9 +995,8 @@ int drbd_send_state(struct drbd_conf *mdev) p.state = cpu_to_be32(mdev->state.i); /* Within the send mutex */ sock = mdev->tconn->data.socket; - if (likely(sock != NULL)) { - ok = _drbd_send_cmd(mdev, sock, P_STATE, &p.head, sizeof(p), 0); - } + if (likely(sock != NULL)) + ok = !_drbd_send_cmd(mdev, sock, P_STATE, &p.head, sizeof(p), 0); mutex_unlock(&mdev->tconn->data.mutex); @@ -1150,8 +1149,8 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, if (len) { DCBP_set_code(p, RLE_VLI_Bits); - ok = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_COMPRESSED_BITMAP, h, - sizeof(*p) + len, 0); + ok = !_drbd_send_cmd(mdev, mdev->tconn->data.socket, P_COMPRESSED_BITMAP, h, + sizeof(*p) + len, 0); c->packets[0]++; c->bytes[0] += sizeof(*p) + len; @@ -1165,8 +1164,8 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, len = num_words * sizeof(long); if (len) drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload); - ok = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_BITMAP, - h, sizeof(struct p_header80) + len, 0); + ok = !_drbd_send_cmd(mdev, mdev->tconn->data.socket, P_BITMAP, + h, sizeof(struct p_header80) + len, 0); c->word_offset += num_words; c->bit_offset = c->word_offset * BITS_PER_LONG; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 39e49152e0b..d7263095cc1 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1215,8 +1215,8 @@ int w_send_barrier(struct drbd_work *w, int cancel) /* inc_ap_pending was done where this was queued. * dec_ap_pending will be done in got_BarrierAck * or (on connection loss) in w_clear_epoch. */ - ok = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_BARRIER, - &p->head, sizeof(*p), 0); + ok = !_drbd_send_cmd(mdev, mdev->tconn->data.socket, P_BARRIER, + &p->head, sizeof(*p), 0); drbd_put_data_sock(mdev->tconn); return ok; From 596a37f9eff332fa034cd6bea0c0ee2e09c7d900 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 15 Mar 2011 23:55:59 +0100 Subject: [PATCH 207/609] drbd: conn_send_cmd(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 6 +++--- drivers/block/drbd/drbd_main.c | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 0bd8c2a800c..4efb5358a61 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1932,7 +1932,7 @@ static inline int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, static inline int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, enum drbd_packet cmd, struct p_header *h, size_t size) { - return conn_send_cmd(mdev->tconn, mdev->vnr, use_data_socket, cmd, h, size); + return !conn_send_cmd(mdev->tconn, mdev->vnr, use_data_socket, cmd, h, size); } static inline int drbd_send_short_cmd(struct drbd_conf *mdev, @@ -1945,13 +1945,13 @@ static inline int drbd_send_short_cmd(struct drbd_conf *mdev, static inline int drbd_send_ping(struct drbd_tconn *tconn) { struct p_header h; - return conn_send_cmd(tconn, 0, USE_META_SOCKET, P_PING, &h, sizeof(h)); + return !conn_send_cmd(tconn, 0, USE_META_SOCKET, P_PING, &h, sizeof(h)); } static inline int drbd_send_ping_ack(struct drbd_tconn *tconn) { struct p_header h; - return conn_send_cmd(tconn, 0, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h)); + return !conn_send_cmd(tconn, 0, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h)); } static inline int drbd_send_state_req(struct drbd_conf *mdev, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2177915f04a..1fcbaa178b4 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -739,8 +739,8 @@ int _conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct socket *sock, int conn_send_cmd(struct drbd_tconn *tconn, int vnr, int use_data_socket, enum drbd_packet cmd, struct p_header *h, size_t size) { - int ok = 0; struct socket *sock; + int err = -EIO; if (use_data_socket) { mutex_lock(&tconn->data.mutex); @@ -753,13 +753,13 @@ int conn_send_cmd(struct drbd_tconn *tconn, int vnr, int use_data_socket, /* drbd_disconnect() could have called drbd_free_sock() * while we were waiting in down()... */ if (likely(sock != NULL)) - ok = !_conn_send_cmd(tconn, vnr, sock, cmd, h, size, 0); + err = _conn_send_cmd(tconn, vnr, sock, cmd, h, size, 0); if (use_data_socket) mutex_unlock(&tconn->data.mutex); else mutex_unlock(&tconn->meta.mutex); - return ok; + return err; } int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data, @@ -1011,7 +1011,7 @@ int _conn_send_state_req(struct drbd_tconn *tconn, int vnr, enum drbd_packet cmd p.mask = cpu_to_be32(mask.i); p.val = cpu_to_be32(val.i); - return conn_send_cmd(tconn, vnr, USE_DATA_SOCKET, cmd, &p.head, sizeof(p)); + return !conn_send_cmd(tconn, vnr, USE_DATA_SOCKET, cmd, &p.head, sizeof(p)); } int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode) @@ -1030,7 +1030,7 @@ int conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode) p.retcode = cpu_to_be32(retcode); - return conn_send_cmd(tconn, 0, USE_META_SOCKET, cmd, &p.head, sizeof(p)); + return !conn_send_cmd(tconn, 0, USE_META_SOCKET, cmd, &p.head, sizeof(p)); } int fill_bitmap_rle_bits(struct drbd_conf *mdev, From 7d168ed30fcc59767df275e464712d5331bd07da Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 00:11:25 +0100 Subject: [PATCH 208/609] drbd: Get rid of USE_DATA_SOCKET and USE_META_SOCKET Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 14 +++++------ drivers/block/drbd/drbd_main.c | 46 ++++++++++++---------------------- 2 files changed, 22 insertions(+), 38 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4efb5358a61..556e785ebfa 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1176,12 +1176,10 @@ extern int drbd_send_state(struct drbd_conf *mdev); extern int _conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct socket *sock, enum drbd_packet cmd, struct p_header *h, size_t size, unsigned msg_flags); -extern int conn_send_cmd(struct drbd_tconn *tconn, int vnr, int use_data_socket, +extern int conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct drbd_socket *sock, enum drbd_packet cmd, struct p_header *h, size_t size); extern int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data, size_t size); -#define USE_DATA_SOCKET 1 -#define USE_META_SOCKET 0 extern int drbd_send_sync_param(struct drbd_conf *mdev); extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size); @@ -1929,29 +1927,29 @@ static inline int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, return _conn_send_cmd(mdev->tconn, mdev->vnr, sock, cmd, h, size, msg_flags); } -static inline int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, +static inline int drbd_send_cmd(struct drbd_conf *mdev, struct drbd_socket *sock, enum drbd_packet cmd, struct p_header *h, size_t size) { - return !conn_send_cmd(mdev->tconn, mdev->vnr, use_data_socket, cmd, h, size); + return !conn_send_cmd(mdev->tconn, mdev->vnr, sock, cmd, h, size); } static inline int drbd_send_short_cmd(struct drbd_conf *mdev, enum drbd_packet cmd) { struct p_header h; - return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h)); + return drbd_send_cmd(mdev, &mdev->tconn->data, cmd, &h, sizeof(h)); } static inline int drbd_send_ping(struct drbd_tconn *tconn) { struct p_header h; - return !conn_send_cmd(tconn, 0, USE_META_SOCKET, P_PING, &h, sizeof(h)); + return !conn_send_cmd(tconn, 0, &tconn->meta, P_PING, &h, sizeof(h)); } static inline int drbd_send_ping_ack(struct drbd_tconn *tconn) { struct p_header h; - return !conn_send_cmd(tconn, 0, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h)); + return !conn_send_cmd(tconn, 0, &tconn->meta, P_PING_ACK, &h, sizeof(h)); } static inline int drbd_send_state_req(struct drbd_conf *mdev, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 1fcbaa178b4..3a9bafbd416 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -736,29 +736,15 @@ int _conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct socket *sock, /* don't pass the socket. we may only look at it * when we hold the appropriate socket mutex. */ -int conn_send_cmd(struct drbd_tconn *tconn, int vnr, int use_data_socket, +int conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct drbd_socket *sock, enum drbd_packet cmd, struct p_header *h, size_t size) { - struct socket *sock; int err = -EIO; - if (use_data_socket) { - mutex_lock(&tconn->data.mutex); - sock = tconn->data.socket; - } else { - mutex_lock(&tconn->meta.mutex); - sock = tconn->meta.socket; - } - - /* drbd_disconnect() could have called drbd_free_sock() - * while we were waiting in down()... */ - if (likely(sock != NULL)) - err = _conn_send_cmd(tconn, vnr, sock, cmd, h, size, 0); - - if (use_data_socket) - mutex_unlock(&tconn->data.mutex); - else - mutex_unlock(&tconn->meta.mutex); + mutex_lock(&sock->mutex); + if (sock->socket) + err = _conn_send_cmd(tconn, vnr, sock->socket, cmd, h, size, 0); + mutex_unlock(&sock->mutex); return err; } @@ -900,7 +886,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) put_ldev(mdev); - return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS, &p.head, sizeof(p)); + return drbd_send_cmd(mdev, &mdev->tconn->data, P_UUIDS, &p.head, sizeof(p)); } int drbd_send_uuids(struct drbd_conf *mdev) @@ -944,7 +930,7 @@ int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev) drbd_md_sync(mdev); p.uuid = cpu_to_be64(uuid); - return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID, &p.head, sizeof(p)); + return drbd_send_cmd(mdev, &mdev->tconn->data, P_SYNC_UUID, &p.head, sizeof(p)); } int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags) @@ -976,7 +962,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl p.queue_order_type = cpu_to_be16(q_order_type); p.dds_flags = cpu_to_be16(flags); - ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES, &p.head, sizeof(p)); + ok = drbd_send_cmd(mdev, &mdev->tconn->data, P_SIZES, &p.head, sizeof(p)); return ok; } @@ -1011,7 +997,7 @@ int _conn_send_state_req(struct drbd_tconn *tconn, int vnr, enum drbd_packet cmd p.mask = cpu_to_be32(mask.i); p.val = cpu_to_be32(val.i); - return !conn_send_cmd(tconn, vnr, USE_DATA_SOCKET, cmd, &p.head, sizeof(p)); + return !conn_send_cmd(tconn, vnr, &tconn->data, cmd, &p.head, sizeof(p)); } int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode) @@ -1020,7 +1006,7 @@ int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode) p.retcode = cpu_to_be32(retcode); - return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY, &p.head, sizeof(p)); + return drbd_send_cmd(mdev, &mdev->tconn->meta, P_STATE_CHG_REPLY, &p.head, sizeof(p)); } int conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode) @@ -1030,7 +1016,7 @@ int conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode) p.retcode = cpu_to_be32(retcode); - return !conn_send_cmd(tconn, 0, USE_META_SOCKET, cmd, &p.head, sizeof(p)); + return !conn_send_cmd(tconn, 0, &tconn->meta, cmd, &p.head, sizeof(p)); } int fill_bitmap_rle_bits(struct drbd_conf *mdev, @@ -1254,7 +1240,7 @@ int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size) if (mdev->state.conn < C_CONNECTED) return false; - ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK, &p.head, sizeof(p)); + ok = drbd_send_cmd(mdev, &mdev->tconn->meta, P_BARRIER_ACK, &p.head, sizeof(p)); return ok; } @@ -1279,7 +1265,7 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, if (!mdev->tconn->meta.socket || mdev->state.conn < C_CONNECTED) return false; - ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd, &p.head, sizeof(p)); + ok = drbd_send_cmd(mdev, &mdev->tconn->meta, cmd, &p.head, sizeof(p)); return ok; } @@ -1337,7 +1323,7 @@ int drbd_send_drequest(struct drbd_conf *mdev, int cmd, p.block_id = block_id; p.blksize = cpu_to_be32(size); - ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &p.head, sizeof(p)); + ok = drbd_send_cmd(mdev, &mdev->tconn->data, cmd, &p.head, sizeof(p)); return ok; } @@ -1371,7 +1357,7 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) p.block_id = ID_SYNCER /* unused */; p.blksize = cpu_to_be32(size); - ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST, &p.head, sizeof(p)); + ok = drbd_send_cmd(mdev, &mdev->tconn->data, P_OV_REQUEST, &p.head, sizeof(p)); return ok; } @@ -1668,7 +1654,7 @@ int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req) p.sector = cpu_to_be64(req->i.sector); p.blksize = cpu_to_be32(req->i.size); - return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OUT_OF_SYNC, &p.head, sizeof(p)); + return drbd_send_cmd(mdev, &mdev->tconn->data, P_OUT_OF_SYNC, &p.head, sizeof(p)); } /* From f725446353ab88ac1ec69bf1312ed83ce7b8a413 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 00:38:58 +0100 Subject: [PATCH 209/609] drbd: drbd_send_cmd(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 ++-- drivers/block/drbd/drbd_main.c | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 556e785ebfa..99b84b67095 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1930,14 +1930,14 @@ static inline int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, static inline int drbd_send_cmd(struct drbd_conf *mdev, struct drbd_socket *sock, enum drbd_packet cmd, struct p_header *h, size_t size) { - return !conn_send_cmd(mdev->tconn, mdev->vnr, sock, cmd, h, size); + return conn_send_cmd(mdev->tconn, mdev->vnr, sock, cmd, h, size); } static inline int drbd_send_short_cmd(struct drbd_conf *mdev, enum drbd_packet cmd) { struct p_header h; - return drbd_send_cmd(mdev, &mdev->tconn->data, cmd, &h, sizeof(h)); + return !drbd_send_cmd(mdev, &mdev->tconn->data, cmd, &h, sizeof(h)); } static inline int drbd_send_ping(struct drbd_tconn *tconn) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 3a9bafbd416..e41fffbfd67 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -886,7 +886,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) put_ldev(mdev); - return drbd_send_cmd(mdev, &mdev->tconn->data, P_UUIDS, &p.head, sizeof(p)); + return !drbd_send_cmd(mdev, &mdev->tconn->data, P_UUIDS, &p.head, sizeof(p)); } int drbd_send_uuids(struct drbd_conf *mdev) @@ -930,7 +930,7 @@ int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev) drbd_md_sync(mdev); p.uuid = cpu_to_be64(uuid); - return drbd_send_cmd(mdev, &mdev->tconn->data, P_SYNC_UUID, &p.head, sizeof(p)); + return !drbd_send_cmd(mdev, &mdev->tconn->data, P_SYNC_UUID, &p.head, sizeof(p)); } int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags) @@ -962,7 +962,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl p.queue_order_type = cpu_to_be16(q_order_type); p.dds_flags = cpu_to_be16(flags); - ok = drbd_send_cmd(mdev, &mdev->tconn->data, P_SIZES, &p.head, sizeof(p)); + ok = !drbd_send_cmd(mdev, &mdev->tconn->data, P_SIZES, &p.head, sizeof(p)); return ok; } @@ -1006,7 +1006,7 @@ int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode) p.retcode = cpu_to_be32(retcode); - return drbd_send_cmd(mdev, &mdev->tconn->meta, P_STATE_CHG_REPLY, &p.head, sizeof(p)); + return !drbd_send_cmd(mdev, &mdev->tconn->meta, P_STATE_CHG_REPLY, &p.head, sizeof(p)); } int conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode) @@ -1240,7 +1240,7 @@ int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size) if (mdev->state.conn < C_CONNECTED) return false; - ok = drbd_send_cmd(mdev, &mdev->tconn->meta, P_BARRIER_ACK, &p.head, sizeof(p)); + ok = !drbd_send_cmd(mdev, &mdev->tconn->meta, P_BARRIER_ACK, &p.head, sizeof(p)); return ok; } @@ -1265,7 +1265,7 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, if (!mdev->tconn->meta.socket || mdev->state.conn < C_CONNECTED) return false; - ok = drbd_send_cmd(mdev, &mdev->tconn->meta, cmd, &p.head, sizeof(p)); + ok = !drbd_send_cmd(mdev, &mdev->tconn->meta, cmd, &p.head, sizeof(p)); return ok; } @@ -1323,7 +1323,7 @@ int drbd_send_drequest(struct drbd_conf *mdev, int cmd, p.block_id = block_id; p.blksize = cpu_to_be32(size); - ok = drbd_send_cmd(mdev, &mdev->tconn->data, cmd, &p.head, sizeof(p)); + ok = !drbd_send_cmd(mdev, &mdev->tconn->data, cmd, &p.head, sizeof(p)); return ok; } @@ -1357,7 +1357,7 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) p.block_id = ID_SYNCER /* unused */; p.blksize = cpu_to_be32(size); - ok = drbd_send_cmd(mdev, &mdev->tconn->data, P_OV_REQUEST, &p.head, sizeof(p)); + ok = !drbd_send_cmd(mdev, &mdev->tconn->data, P_OV_REQUEST, &p.head, sizeof(p)); return ok; } @@ -1654,7 +1654,7 @@ int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req) p.sector = cpu_to_be64(req->i.sector); p.blksize = cpu_to_be32(req->i.size); - return drbd_send_cmd(mdev, &mdev->tconn->data, P_OUT_OF_SYNC, &p.head, sizeof(p)); + return !drbd_send_cmd(mdev, &mdev->tconn->data, P_OUT_OF_SYNC, &p.head, sizeof(p)); } /* From 103ea275285970568cbc07ce01063aa69bac5577 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 00:43:02 +0100 Subject: [PATCH 210/609] drbd: drbd_send_sync_param(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 8 ++++---- drivers/block/drbd/drbd_receiver.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index e41fffbfd67..80cfd302071 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -769,7 +769,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev) { struct p_rs_param_95 *p; struct socket *sock; - int size, rv; + int size, err; const int apv = mdev->tconn->agreed_pro_version; size = apv <= 87 ? sizeof(struct p_rs_param) @@ -813,13 +813,13 @@ int drbd_send_sync_param(struct drbd_conf *mdev) if (apv >= 89) strcpy(p->csums_alg, mdev->tconn->net_conf->csums_alg); - rv = !_drbd_send_cmd(mdev, sock, cmd, &p->head, size, 0); + err = _drbd_send_cmd(mdev, sock, cmd, &p->head, size, 0); } else - rv = 0; /* not ok */ + err = -EIO; mutex_unlock(&mdev->tconn->data.mutex); - return rv; + return err; } int drbd_send_protocol(struct drbd_tconn *tconn) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index d21c2ffe6c6..c35be86b121 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -763,7 +763,7 @@ int drbd_connected(int vnr, void *p, void *data) &mdev->tconn->cstate_mutex : &mdev->own_state_mutex; - ok &= drbd_send_sync_param(mdev); + ok &= !drbd_send_sync_param(mdev); ok &= drbd_send_sizes(mdev, 0, 0); ok &= drbd_send_uuids(mdev); ok &= drbd_send_state(mdev); From 927036f9089d9c776217656960b2352971cc2ea0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 00:50:00 +0100 Subject: [PATCH 211/609] drbd: drbd_send_state(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 6 +++--- drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/drbd/drbd_state.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 80cfd302071..8d5ed6f5241 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -974,7 +974,7 @@ int drbd_send_state(struct drbd_conf *mdev) { struct socket *sock; struct p_state p; - int ok = 0; + int err = -EIO; mutex_lock(&mdev->tconn->data.mutex); @@ -982,11 +982,11 @@ int drbd_send_state(struct drbd_conf *mdev) sock = mdev->tconn->data.socket; if (likely(sock != NULL)) - ok = !_drbd_send_cmd(mdev, sock, P_STATE, &p.head, sizeof(p), 0); + err = _drbd_send_cmd(mdev, sock, P_STATE, &p.head, sizeof(p), 0); mutex_unlock(&mdev->tconn->data.mutex); - return ok; + return err; } int _conn_send_state_req(struct drbd_tconn *tconn, int vnr, enum drbd_packet cmd, diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c35be86b121..fb907377402 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -766,7 +766,7 @@ int drbd_connected(int vnr, void *p, void *data) ok &= !drbd_send_sync_param(mdev); ok &= drbd_send_sizes(mdev, 0, 0); ok &= drbd_send_uuids(mdev); - ok &= drbd_send_state(mdev); + ok &= !drbd_send_state(mdev); clear_bit(USE_DEGR_WFC_T, &mdev->flags); clear_bit(RESIZE_PENDING, &mdev->flags); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index cb08e011c28..29372dbad27 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1188,7 +1188,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, "ASSERT FAILED: disk is %s during detach\n", drbd_disk_str(mdev->state.disk)); - if (drbd_send_state(mdev)) + if (!drbd_send_state(mdev)) dev_warn(DEV, "Notified peer that I am detaching my disk\n"); else dev_err(DEV, "Sending state for detaching disk failed\n"); @@ -1220,7 +1220,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, mdev->rs_failed = 0; atomic_set(&mdev->rs_pending_cnt, 0); - if (drbd_send_state(mdev)) + if (!drbd_send_state(mdev)) dev_warn(DEV, "Notified peer that I'm now diskless.\n"); /* corresponding get_ldev in __drbd_set_state * this may finally trigger drbd_ldev_destroy. */ From e8d17b015eb8103a4dd5d547baa158c30fa18220 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 00:54:19 +0100 Subject: [PATCH 212/609] drbd: drbd_send_handshake(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index fb907377402..470c7ed3258 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4135,25 +4135,25 @@ static int drbd_send_handshake(struct drbd_tconn *tconn) { /* ASSERT current == mdev->tconn->receiver ... */ struct p_handshake *p = &tconn->data.sbuf.handshake; - int ok; + int err; if (mutex_lock_interruptible(&tconn->data.mutex)) { conn_err(tconn, "interrupted during initial handshake\n"); - return 0; /* interrupted. not ok. */ + return -EINTR; } if (tconn->data.socket == NULL) { mutex_unlock(&tconn->data.mutex); - return 0; + return -EIO; } memset(p, 0, sizeof(*p)); p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); - ok = !_conn_send_cmd(tconn, 0, tconn->data.socket, P_HAND_SHAKE, + err = _conn_send_cmd(tconn, 0, tconn->data.socket, P_HAND_SHAKE, &p->head, sizeof(*p), 0); mutex_unlock(&tconn->data.mutex); - return ok; + return err; } /* @@ -4169,10 +4169,10 @@ static int drbd_do_handshake(struct drbd_tconn *tconn) struct p_handshake *p = &tconn->data.rbuf.handshake; const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80); struct packet_info pi; - int rv; + int err, rv; - rv = drbd_send_handshake(tconn); - if (!rv) + err = drbd_send_handshake(tconn); + if (err) return 0; rv = drbd_recv_header(tconn, &pi); From 387eb30817f9009467a90cdbb3ab4dd0dc02a92f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 01:05:37 +0100 Subject: [PATCH 213/609] drbd: drbd_send_protocol(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 10 +++++----- drivers/block/drbd/drbd_receiver.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 8d5ed6f5241..29b993b9b8d 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -825,7 +825,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev) int drbd_send_protocol(struct drbd_tconn *tconn) { struct p_protocol *p; - int size, cf, rv; + int size, cf, err; size = sizeof(struct p_protocol); @@ -836,7 +836,7 @@ int drbd_send_protocol(struct drbd_tconn *tconn) * as that is blocked during handshake */ p = kmalloc(size, GFP_NOIO); if (p == NULL) - return 0; + return -ENOMEM; p->protocol = cpu_to_be32(tconn->net_conf->wire_protocol); p->after_sb_0p = cpu_to_be32(tconn->net_conf->after_sb_0p); @@ -853,7 +853,7 @@ int drbd_send_protocol(struct drbd_tconn *tconn) else { conn_err(tconn, "--dry-run is not supported by peer"); kfree(p); - return -1; + return -EOPNOTSUPP; } } p->conn_flags = cpu_to_be32(cf); @@ -861,9 +861,9 @@ int drbd_send_protocol(struct drbd_tconn *tconn) if (tconn->agreed_pro_version >= 87) strcpy(p->integrity_alg, tconn->net_conf->integrity_alg); - rv = !conn_send_cmd2(tconn, P_PROTOCOL, p->head.payload, size - sizeof(struct p_header)); + err = conn_send_cmd2(tconn, P_PROTOCOL, p->head.payload, size - sizeof(struct p_header)); kfree(p); - return rv; + return err; } int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 470c7ed3258..222fca535e9 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -931,7 +931,7 @@ retry: drbd_thread_start(&tconn->asender); - if (drbd_send_protocol(tconn) == -1) + if (drbd_send_protocol(tconn) == -EOPNOTSUPP) return -1; return !idr_for_each(&tconn->volumes, drbd_connected, tconn); From 2ae5f95b1a3b6d60d9ce971e7f2c8ef84c306538 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 01:07:20 +0100 Subject: [PATCH 214/609] drbd: drbd_send_uuids() and its variants: Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 4 ++-- drivers/block/drbd/drbd_receiver.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 29b993b9b8d..95586870001 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -872,7 +872,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) int i; if (!get_ldev_if_state(mdev, D_NEGOTIATING)) - return 1; + return 0; for (i = UI_CURRENT; i < UI_SIZE; i++) p.uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0; @@ -886,7 +886,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) put_ldev(mdev); - return !drbd_send_cmd(mdev, &mdev->tconn->data, P_UUIDS, &p.head, sizeof(p)); + return drbd_send_cmd(mdev, &mdev->tconn->data, P_UUIDS, &p.head, sizeof(p)); } int drbd_send_uuids(struct drbd_conf *mdev) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 222fca535e9..1403e4b18a2 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -765,7 +765,7 @@ int drbd_connected(int vnr, void *p, void *data) ok &= !drbd_send_sync_param(mdev); ok &= drbd_send_sizes(mdev, 0, 0); - ok &= drbd_send_uuids(mdev); + ok &= !drbd_send_uuids(mdev); ok &= !drbd_send_state(mdev); clear_bit(USE_DEGR_WFC_T, &mdev->flags); clear_bit(RESIZE_PENDING, &mdev->flags); From 9c1b7f7282d9f6a8623706c0c361652105f3e7ad Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 01:09:01 +0100 Subject: [PATCH 215/609] drbd: drbd_gen_and_send_sync_uuid(): Return void: the result is never used Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 99b84b67095..dacc63f2dfe 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1167,7 +1167,7 @@ extern int drbd_send_all(struct drbd_tconn *, struct socket *, void *, size_t, extern int drbd_send_protocol(struct drbd_tconn *tconn); extern int drbd_send_uuids(struct drbd_conf *mdev); extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); -extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev); +extern void drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev); extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); extern int _conn_send_state_req(struct drbd_tconn *, int vnr, enum drbd_packet cmd, union drbd_state, union drbd_state); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 95586870001..7ed2e2ebb89 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -917,7 +917,7 @@ void drbd_print_uuids(struct drbd_conf *mdev, const char *text) } } -int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev) +void drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev) { struct p_rs_uuid p; u64 uuid; @@ -930,7 +930,7 @@ int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev) drbd_md_sync(mdev); p.uuid = cpu_to_be64(uuid); - return !drbd_send_cmd(mdev, &mdev->tconn->data, P_SYNC_UUID, &p.head, sizeof(p)); + drbd_send_cmd(mdev, &mdev->tconn->data, P_SYNC_UUID, &p.head, sizeof(p)); } int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags) From f02d4d0a9c3d3b8aef5a3b1016a3f69b0ae9496e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 01:12:50 +0100 Subject: [PATCH 216/609] drbd: drbd_send_sizes(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 4 +--- drivers/block/drbd/drbd_receiver.c | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 7ed2e2ebb89..8e3607a2ae0 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -938,7 +938,6 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl struct p_sizes p; sector_t d_size, u_size; int q_order_type, max_bio_size; - int ok; if (get_ldev_if_state(mdev, D_NEGOTIATING)) { D_ASSERT(mdev->ldev->backing_bdev); @@ -962,8 +961,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl p.queue_order_type = cpu_to_be16(q_order_type); p.dds_flags = cpu_to_be16(flags); - ok = !drbd_send_cmd(mdev, &mdev->tconn->data, P_SIZES, &p.head, sizeof(p)); - return ok; + return drbd_send_cmd(mdev, &mdev->tconn->data, P_SIZES, &p.head, sizeof(p)); } /** diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1403e4b18a2..30a9559918a 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -764,7 +764,7 @@ int drbd_connected(int vnr, void *p, void *data) &mdev->own_state_mutex; ok &= !drbd_send_sync_param(mdev); - ok &= drbd_send_sizes(mdev, 0, 0); + ok &= !drbd_send_sizes(mdev, 0, 0); ok &= !drbd_send_uuids(mdev); ok &= !drbd_send_state(mdev); clear_bit(USE_DEGR_WFC_T, &mdev->flags); From 758970c832a8e44f887c69fe481ea56ca44f08a6 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 01:16:25 +0100 Subject: [PATCH 217/609] drbd: _conn_send_state_req(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 ++-- drivers/block/drbd/drbd_main.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index dacc63f2dfe..aa426185fe9 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1955,14 +1955,14 @@ static inline int drbd_send_ping_ack(struct drbd_tconn *tconn) static inline int drbd_send_state_req(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val) { - return _conn_send_state_req(mdev->tconn, mdev->vnr, P_STATE_CHG_REQ, mask, val); + return !_conn_send_state_req(mdev->tconn, mdev->vnr, P_STATE_CHG_REQ, mask, val); } static inline int conn_send_state_req(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val) { enum drbd_packet cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REQ : P_CONN_ST_CHG_REQ; - return _conn_send_state_req(tconn, 0, cmd, mask, val); + return !_conn_send_state_req(tconn, 0, cmd, mask, val); } static inline void drbd_thread_stop(struct drbd_thread *thi) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 8e3607a2ae0..69957ba8f9f 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -995,7 +995,7 @@ int _conn_send_state_req(struct drbd_tconn *tconn, int vnr, enum drbd_packet cmd p.mask = cpu_to_be32(mask.i); p.val = cpu_to_be32(val.i); - return !conn_send_cmd(tconn, vnr, &tconn->data, cmd, &p.head, sizeof(p)); + return conn_send_cmd(tconn, vnr, &tconn->data, cmd, &p.head, sizeof(p)); } int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode) From caee1c3a92246628fa323429db98a9b1a8a758a5 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 01:17:35 +0100 Subject: [PATCH 218/609] drbd: conn_send_state_req(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_state.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index aa426185fe9..1cc8dd759f1 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1962,7 +1962,7 @@ static inline int conn_send_state_req(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val) { enum drbd_packet cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REQ : P_CONN_ST_CHG_REQ; - return !_conn_send_state_req(tconn, 0, cmd, mask, val); + return _conn_send_state_req(tconn, 0, cmd, mask, val); } static inline void drbd_thread_stop(struct drbd_thread *thi) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 29372dbad27..a33dda1e7bc 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1438,7 +1438,7 @@ conn_cl_wide(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state v spin_unlock_irq(&tconn->req_lock); mutex_lock(&tconn->cstate_mutex); - if (!conn_send_state_req(tconn, mask, val)) { + if (conn_send_state_req(tconn, mask, val)) { rv = SS_CW_FAILED_BY_PEER; /* if (f & CS_VERBOSE) print_st_err(mdev, os, ns, rv); */ From d24ae219e96b7903c712e527cde1844bddbb1311 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 01:19:27 +0100 Subject: [PATCH 219/609] drbd: drbd_send_state_req(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_state.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 1cc8dd759f1..dacc63f2dfe 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1955,7 +1955,7 @@ static inline int drbd_send_ping_ack(struct drbd_tconn *tconn) static inline int drbd_send_state_req(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val) { - return !_conn_send_state_req(mdev->tconn, mdev->vnr, P_STATE_CHG_REQ, mask, val); + return _conn_send_state_req(mdev->tconn, mdev->vnr, P_STATE_CHG_REQ, mask, val); } static inline int conn_send_state_req(struct drbd_tconn *tconn, diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index a33dda1e7bc..1c681110580 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -197,7 +197,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, goto abort; } - if (!drbd_send_state_req(mdev, mask, val)) { + if (drbd_send_state_req(mdev, mask, val)) { rv = SS_CW_FAILED_BY_PEER; if (f & CS_VERBOSE) print_st_err(mdev, os, ns, rv); From 2f4e7abe5175cd17d74e41b9f4e8e04cd73342fb Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 01:20:38 +0100 Subject: [PATCH 220/609] drbd: drbd_send_sr_reply(): Return void: the result is never used Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index dacc63f2dfe..b82ef880913 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1204,7 +1204,7 @@ extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size) extern int drbd_send_bitmap(struct drbd_conf *mdev); extern int _drbd_send_bitmap(struct drbd_conf *mdev); -extern int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode); +extern void drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode); extern int conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode); extern void drbd_free_bc(struct drbd_backing_dev *ldev); extern void drbd_mdev_cleanup(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 69957ba8f9f..c91450260d8 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -998,13 +998,13 @@ int _conn_send_state_req(struct drbd_tconn *tconn, int vnr, enum drbd_packet cmd return conn_send_cmd(tconn, vnr, &tconn->data, cmd, &p.head, sizeof(p)); } -int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode) +void drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode) { struct p_req_state_reply p; p.retcode = cpu_to_be32(retcode); - return !drbd_send_cmd(mdev, &mdev->tconn->meta, P_STATE_CHG_REPLY, &p.head, sizeof(p)); + drbd_send_cmd(mdev, &mdev->tconn->meta, P_STATE_CHG_REPLY, &p.head, sizeof(p)); } int conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode) From d4e67d7c4fa7989175910faac41ab6de77399cf6 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 01:25:28 +0100 Subject: [PATCH 221/609] drbd: drbd_send_b_ack(): Return void: the result is never used Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 ++-- drivers/block/drbd/drbd_main.c | 10 +++------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index b82ef880913..354c9ddb09d 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1181,8 +1181,8 @@ extern int conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct drbd_socket * extern int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data, size_t size); extern int drbd_send_sync_param(struct drbd_conf *mdev); -extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, - u32 set_size); +extern void drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, + u32 set_size); extern int drbd_send_ack(struct drbd_conf *, enum drbd_packet, struct drbd_peer_request *); extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index c91450260d8..5bff744897f 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1227,19 +1227,15 @@ int drbd_send_bitmap(struct drbd_conf *mdev) drbd_put_data_sock(mdev->tconn); return err; } - -int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size) +void drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size) { - int ok; struct p_barrier_ack p; p.barrier = barrier_nr; p.set_size = cpu_to_be32(set_size); - if (mdev->state.conn < C_CONNECTED) - return false; - ok = !drbd_send_cmd(mdev, &mdev->tconn->meta, P_BARRIER_ACK, &p.head, sizeof(p)); - return ok; + if (mdev->state.conn >= C_CONNECTED) + drbd_send_cmd(mdev, &mdev->tconn->meta, P_BARRIER_ACK, &p.head, sizeof(p)); } /** From a8c32aa846cfa271744f1c11fcf425b131137991 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 01:27:22 +0100 Subject: [PATCH 222/609] drbd: _drbd_send_ack(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 5bff744897f..c80ba373bcd 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1249,7 +1249,6 @@ void drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size) static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, u64 sector, u32 blksize, u64 block_id) { - int ok; struct p_block_ack p; p.sector = sector; @@ -1258,9 +1257,8 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, p.seq_num = cpu_to_be32(atomic_inc_return(&mdev->packet_seq)); if (!mdev->tconn->meta.socket || mdev->state.conn < C_CONNECTED) - return false; - ok = !drbd_send_cmd(mdev, &mdev->tconn->meta, cmd, &p.head, sizeof(p)); - return ok; + return -EIO; + return drbd_send_cmd(mdev, &mdev->tconn->meta, cmd, &p.head, sizeof(p)); } /* dp->sector and dp->block_id already/still in network byte order, @@ -1271,14 +1269,14 @@ int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd, { data_size -= (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ? crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0; - return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size), - dp->block_id); + return !_drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size), + dp->block_id); } int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd, struct p_block_req *rp) { - return _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id); + return !_drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id); } /** @@ -1290,10 +1288,10 @@ int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd, int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, struct drbd_peer_request *peer_req) { - return _drbd_send_ack(mdev, cmd, - cpu_to_be64(peer_req->i.sector), - cpu_to_be32(peer_req->i.size), - peer_req->block_id); + return !_drbd_send_ack(mdev, cmd, + cpu_to_be64(peer_req->i.sector), + cpu_to_be32(peer_req->i.size), + peer_req->block_id); } /* This function misuses the block_id field to signal if the blocks @@ -1301,10 +1299,10 @@ int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd, sector_t sector, int blksize, u64 block_id) { - return _drbd_send_ack(mdev, cmd, - cpu_to_be64(sector), - cpu_to_be32(blksize), - cpu_to_be64(block_id)); + return !_drbd_send_ack(mdev, cmd, + cpu_to_be64(sector), + cpu_to_be32(blksize), + cpu_to_be64(block_id)); } int drbd_send_drequest(struct drbd_conf *mdev, int cmd, From dd5161218bc514a29e1d8670fe1f3753d5e0f813 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 15:39:08 +0100 Subject: [PATCH 223/609] drbd: drbd_send_ack(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 8 ++++---- drivers/block/drbd/drbd_receiver.c | 10 +++++----- drivers/block/drbd/drbd_worker.c | 10 +++++----- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index c80ba373bcd..bedfc1f62c7 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1288,10 +1288,10 @@ int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd, int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, struct drbd_peer_request *peer_req) { - return !_drbd_send_ack(mdev, cmd, - cpu_to_be64(peer_req->i.sector), - cpu_to_be32(peer_req->i.size), - peer_req->block_id); + return _drbd_send_ack(mdev, cmd, + cpu_to_be64(peer_req->i.sector), + cpu_to_be32(peer_req->i.size), + peer_req->block_id); } /* This function misuses the block_id field to signal if the blocks diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 30a9559918a..9cd3d0d3ae4 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1473,12 +1473,12 @@ static int e_end_resync_block(struct drbd_work *w, int unused) if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { drbd_set_in_sync(mdev, sector, peer_req->i.size); - ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req); + ok = !drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req); } else { /* Record failure to sync */ drbd_rs_failed_io(mdev, sector, peer_req->i.size); - ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req); + ok = !drbd_send_ack(mdev, P_NEG_ACK, peer_req); } dec_unacked(mdev); @@ -1659,11 +1659,11 @@ static int e_end_block(struct drbd_work *w, int cancel) mdev->state.conn <= C_PAUSED_SYNC_T && peer_req->flags & EE_MAY_SET_IN_SYNC) ? P_RS_WRITE_ACK : P_WRITE_ACK; - ok &= drbd_send_ack(mdev, pcmd, peer_req); + ok &= !drbd_send_ack(mdev, pcmd, peer_req); if (pcmd == P_RS_WRITE_ACK) drbd_set_in_sync(mdev, sector, peer_req->i.size); } else { - ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req); + ok = !drbd_send_ack(mdev, P_NEG_ACK, peer_req); /* we expect it to be marked out of sync anyways... * maybe assert this? */ } @@ -1693,7 +1693,7 @@ static int e_send_ack(struct drbd_work *w, enum drbd_packet ack) container_of(w, struct drbd_peer_request, w); int ok; - ok = drbd_send_ack(mdev, ack, peer_req); + ok = !drbd_send_ack(mdev, ack, peer_req); dec_unacked(mdev); return ok; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index d7263095cc1..69db6de984c 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -926,7 +926,7 @@ int w_e_end_data_req(struct drbd_work *w, int cancel) dev_err(DEV, "Sending NegDReply. sector=%llus.\n", (unsigned long long)peer_req->i.sector); - ok = drbd_send_ack(mdev, P_NEG_DREPLY, peer_req); + ok = !drbd_send_ack(mdev, P_NEG_DREPLY, peer_req); } dec_unacked(mdev); @@ -962,7 +962,7 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel) } if (mdev->state.conn == C_AHEAD) { - ok = drbd_send_ack(mdev, P_RS_CANCEL, peer_req); + ok = !drbd_send_ack(mdev, P_RS_CANCEL, peer_req); } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { if (likely(mdev->state.pdsk >= D_INCONSISTENT)) { inc_rs_pending(mdev); @@ -978,7 +978,7 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel) dev_err(DEV, "Sending NegRSDReply. sector %llus.\n", (unsigned long long)peer_req->i.sector); - ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req); + ok = !drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req); /* update resync data with failure */ drbd_rs_failed_io(mdev, peer_req->i.sector, peer_req->i.size); @@ -1034,7 +1034,7 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) drbd_set_in_sync(mdev, peer_req->i.sector, peer_req->i.size); /* rs_same_csums unit is BM_BLOCK_SIZE */ mdev->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT; - ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, peer_req); + ok = !drbd_send_ack(mdev, P_RS_IS_IN_SYNC, peer_req); } else { inc_rs_pending(mdev); peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */ @@ -1043,7 +1043,7 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) ok = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req); } } else { - ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req); + ok = !drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req); if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n"); } From a9a9994dc70f388e91cf4bcc971843084f9af44f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 01:30:14 +0100 Subject: [PATCH 224/609] drbd: drbd_send_ack_{dp,rp}(): Return void: the result is never used Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 8 ++++---- drivers/block/drbd/drbd_main.c | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 354c9ddb09d..bcb3bc25a72 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1185,10 +1185,10 @@ extern void drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size); extern int drbd_send_ack(struct drbd_conf *, enum drbd_packet, struct drbd_peer_request *); -extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd, - struct p_block_req *rp); -extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd, - struct p_data *dp, int data_size); +extern void drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd, + struct p_block_req *rp); +extern void drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd, + struct p_data *dp, int data_size); extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd, sector_t sector, int blksize, u64 block_id); extern int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index bedfc1f62c7..92b24d08395 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1264,19 +1264,19 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, /* dp->sector and dp->block_id already/still in network byte order, * data_size is payload size according to dp->head, * and may need to be corrected for digest size. */ -int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd, - struct p_data *dp, int data_size) +void drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd, + struct p_data *dp, int data_size) { data_size -= (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ? crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0; - return !_drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size), - dp->block_id); + _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size), + dp->block_id); } -int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd, - struct p_block_req *rp) +void drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd, + struct p_block_req *rp) { - return !_drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id); + _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id); } /** From fa79abd893f21f458c74af8bca015aa2ef7486a5 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 01:31:39 +0100 Subject: [PATCH 225/609] drbd: drbd_send_ack_ex(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 8 ++++---- drivers/block/drbd/drbd_worker.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 92b24d08395..47771ab8257 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1299,10 +1299,10 @@ int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd, sector_t sector, int blksize, u64 block_id) { - return !_drbd_send_ack(mdev, cmd, - cpu_to_be64(sector), - cpu_to_be32(blksize), - cpu_to_be64(block_id)); + return _drbd_send_ack(mdev, cmd, + cpu_to_be64(sector), + cpu_to_be32(blksize), + cpu_to_be64(block_id)); } int drbd_send_drequest(struct drbd_conf *mdev, int cmd, diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 69db6de984c..9b740864b51 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1163,8 +1163,8 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) else ov_oos_print(mdev); - ok = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, - eq ? ID_IN_SYNC : ID_OUT_OF_SYNC); + ok = !drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, + eq ? ID_IN_SYNC : ID_OUT_OF_SYNC); dec_unacked(mdev); From 5b9f499c664efc1a72a0fe2538b39db7e75ecd2b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 01:31:39 +0100 Subject: [PATCH 226/609] drbd: drbd_send_ov_request(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 4 +--- drivers/block/drbd/drbd_worker.c | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 47771ab8257..200791e44aa 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1342,15 +1342,13 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector, int size, int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) { - int ok; struct p_block_req p; p.sector = cpu_to_be64(sector); p.block_id = ID_SYNCER /* unused */; p.blksize = cpu_to_be32(size); - ok = !drbd_send_cmd(mdev, &mdev->tconn->data, P_OV_REQUEST, &p.head, sizeof(p)); - return ok; + return drbd_send_cmd(mdev, &mdev->tconn->data, P_OV_REQUEST, &p.head, sizeof(p)); } /* called on sndtimeo diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 9b740864b51..216479641a7 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -695,7 +695,7 @@ static int w_make_ov_request(struct drbd_work *w, int cancel) size = (capacity-sector)<<9; inc_rs_pending(mdev); - if (!drbd_send_ov_request(mdev, sector, size)) { + if (drbd_send_ov_request(mdev, sector, size)) { dec_rs_pending(mdev); return 0; } From 6c1005e74d4142511a165edae72cb6648aa308c5 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 01:34:24 +0100 Subject: [PATCH 227/609] drbd: drbd_send_drequest(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 4 +--- drivers/block/drbd/drbd_worker.c | 6 +++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 200791e44aa..34062e37df6 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1308,15 +1308,13 @@ int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd, int drbd_send_drequest(struct drbd_conf *mdev, int cmd, sector_t sector, int size, u64 block_id) { - int ok; struct p_block_req p; p.sector = cpu_to_be64(sector); p.block_id = block_id; p.blksize = cpu_to_be32(size); - ok = !drbd_send_cmd(mdev, &mdev->tconn->data, cmd, &p.head, sizeof(p)); - return ok; + return drbd_send_cmd(mdev, &mdev->tconn->data, cmd, &p.head, sizeof(p)); } int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector, int size, diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 216479641a7..3c785657040 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -637,7 +637,7 @@ next_sector: } } else { inc_rs_pending(mdev); - if (!drbd_send_drequest(mdev, P_RS_DATA_REQUEST, + if (drbd_send_drequest(mdev, P_RS_DATA_REQUEST, sector, size, ID_SYNCER)) { dev_err(DEV, "drbd_send_drequest() failed, aborting...\n"); dec_rs_pending(mdev); @@ -1287,8 +1287,8 @@ int w_send_read_req(struct drbd_work *w, int cancel) return 1; } - ok = drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size, - (unsigned long)req); + ok = !drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size, + (unsigned long)req); req_mod(req, ok ? HANDED_OVER_TO_NETWORK : SEND_FAILED); From db1b0b724e56f34608b76197191ef0577a1ddd45 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 01:37:21 +0100 Subject: [PATCH 228/609] drbd: drbd_send_drequest_csum(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 12 +++++------- drivers/block/drbd/drbd_worker.c | 12 ++++++------ 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 34062e37df6..779bf5b10d5 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1320,7 +1320,7 @@ int drbd_send_drequest(struct drbd_conf *mdev, int cmd, int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector, int size, void *digest, int digest_size, enum drbd_packet cmd) { - int ok; + int err; struct p_block_req p; prepare_header(mdev, &p.head, cmd, sizeof(p) - sizeof(struct p_header) + digest_size); @@ -1329,13 +1329,11 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector, int size, p.blksize = cpu_to_be32(size); mutex_lock(&mdev->tconn->data.mutex); - - ok = (sizeof(p) == drbd_send(mdev->tconn, mdev->tconn->data.socket, &p, sizeof(p), 0)); - ok = ok && (digest_size == drbd_send(mdev->tconn, mdev->tconn->data.socket, digest, digest_size, 0)); - + err = drbd_send_all(mdev->tconn, mdev->tconn->data.socket, &p, sizeof(p), 0); + if (!err) + err = drbd_send_all(mdev->tconn, mdev->tconn->data.socket, digest, digest_size, 0); mutex_unlock(&mdev->tconn->data.mutex); - - return ok; + return err; } int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 3c785657040..a529ffe213a 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -324,9 +324,9 @@ static int w_e_send_csum(struct drbd_work *w, int cancel) drbd_free_ee(mdev, peer_req); peer_req = NULL; inc_rs_pending(mdev); - ok = drbd_send_drequest_csum(mdev, sector, size, - digest, digest_size, - P_CSUM_RS_REQUEST); + ok = !drbd_send_drequest_csum(mdev, sector, size, + digest, digest_size, + P_CSUM_RS_REQUEST); kfree(digest); } else { dev_err(DEV, "kmalloc() of digest failed.\n"); @@ -1089,9 +1089,9 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel) drbd_free_ee(mdev, peer_req); peer_req = NULL; inc_rs_pending(mdev); - ok = drbd_send_drequest_csum(mdev, sector, size, - digest, digest_size, - P_OV_REPLY); + ok = !drbd_send_drequest_csum(mdev, sector, size, + digest, digest_size, + P_OV_REPLY); if (!ok) dec_rs_pending(mdev); kfree(digest); From 73218a3c4c7ae87014b8fc258f8a16a75aad2870 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 01:39:44 +0100 Subject: [PATCH 229/609] drbd: drbd_send_oos(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_worker.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 779bf5b10d5..93805354eec 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1640,7 +1640,7 @@ int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req) p.sector = cpu_to_be64(req->i.sector); p.blksize = cpu_to_be32(req->i.size); - return !drbd_send_cmd(mdev, &mdev->tconn->data, P_OUT_OF_SYNC, &p.head, sizeof(p)); + return drbd_send_cmd(mdev, &mdev->tconn->data, P_OUT_OF_SYNC, &p.head, sizeof(p)); } /* diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index a529ffe213a..3a2c51fd8ea 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1241,7 +1241,7 @@ int w_send_oos(struct drbd_work *w, int cancel) return 1; } - ok = drbd_send_oos(mdev, req); + ok = !drbd_send_oos(mdev, req); req_mod(req, OOS_HANDED_TO_NETWORK); return ok; From b987427b53f4bcddf0a951da573c83ce7eddf70d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 09:41:10 +0100 Subject: [PATCH 230/609] drbd: _drbd_no_send_page(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 93805354eec..ff75cce21f4 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1403,13 +1403,19 @@ static void drbd_update_congested(struct drbd_tconn *tconn) * with page_count == 0 or PageSlab. */ static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, - int offset, size_t size, unsigned msg_flags) + int offset, size_t size, unsigned msg_flags) { - int sent = drbd_send(mdev->tconn, mdev->tconn->data.socket, kmap(page) + offset, size, msg_flags); + struct socket *socket; + void *addr; + int err; + + socket = mdev->tconn->data.socket; + addr = kmap(page) + offset; + err = drbd_send_all(mdev->tconn, socket, addr, size, msg_flags); kunmap(page); - if (sent == size) - mdev->send_cnt += size>>9; - return sent == size; + if (!err) + mdev->send_cnt += size >> 9; + return err; } static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, @@ -1426,7 +1432,7 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, * __page_cache_release a page that would actually still be referenced * by someone, leading to some obscure delayed Oops somewhere else. */ if (disable_sendpage || (page_count(page) < 1) || PageSlab(page)) - return _drbd_no_send_page(mdev, page, offset, size, msg_flags); + return !_drbd_no_send_page(mdev, page, offset, size, msg_flags); msg_flags |= MSG_NOSIGNAL; drbd_update_congested(mdev->tconn); @@ -1465,9 +1471,9 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) int i; /* hint all but last page with MSG_MORE */ __bio_for_each_segment(bvec, bio, i, 0) { - if (!_drbd_no_send_page(mdev, bvec->bv_page, - bvec->bv_offset, bvec->bv_len, - i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) + if (_drbd_no_send_page(mdev, bvec->bv_page, + bvec->bv_offset, bvec->bv_len, + i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) return 0; } return 1; From 88b390ff639ce5d9a1e741bb2f975e932d5cb605 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 10:44:16 +0100 Subject: [PATCH 231/609] drbd: _drbd_send_page(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 44 ++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index ff75cce21f4..fa838b24eed 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1421,9 +1421,10 @@ static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, int offset, size_t size, unsigned msg_flags) { + struct socket *socket = mdev->tconn->data.socket; mm_segment_t oldfs = get_fs(); - int sent, ok; int len = size; + int err = -EIO; /* e.g. XFS meta- & log-data is in slab pages, which have a * page_count of 0 and/or have PageSlab() set. @@ -1432,25 +1433,25 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, * __page_cache_release a page that would actually still be referenced * by someone, leading to some obscure delayed Oops somewhere else. */ if (disable_sendpage || (page_count(page) < 1) || PageSlab(page)) - return !_drbd_no_send_page(mdev, page, offset, size, msg_flags); + return _drbd_no_send_page(mdev, page, offset, size, msg_flags); msg_flags |= MSG_NOSIGNAL; drbd_update_congested(mdev->tconn); set_fs(KERNEL_DS); do { - sent = mdev->tconn->data.socket->ops->sendpage(mdev->tconn->data.socket, page, - offset, len, - msg_flags); - if (sent == -EAGAIN) { - if (we_should_drop_the_connection(mdev->tconn, - mdev->tconn->data.socket)) - break; - else - continue; - } + int sent; + + sent = socket->ops->sendpage(socket, page, offset, len, msg_flags); if (sent <= 0) { + if (sent == -EAGAIN) { + if (we_should_drop_the_connection(mdev->tconn, socket)) + break; + continue; + } dev_warn(DEV, "%s: size=%d len=%d sent=%d\n", __func__, (int)size, len, sent); + if (sent < 0) + err = sent; break; } len -= sent; @@ -1459,10 +1460,11 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, set_fs(oldfs); clear_bit(NET_CONGESTED, &mdev->tconn->flags); - ok = (len == 0); - if (likely(ok)) - mdev->send_cnt += size>>9; - return ok; + if (len == 0) { + err = 0; + mdev->send_cnt += size >> 9; + } + return err; } static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) @@ -1485,9 +1487,9 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) int i; /* hint all but last page with MSG_MORE */ __bio_for_each_segment(bvec, bio, i, 0) { - if (!_drbd_send_page(mdev, bvec->bv_page, - bvec->bv_offset, bvec->bv_len, - i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) + if (_drbd_send_page(mdev, bvec->bv_page, + bvec->bv_offset, bvec->bv_len, + i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) return 0; } return 1; @@ -1502,8 +1504,8 @@ static int _drbd_send_zc_ee(struct drbd_conf *mdev, /* hint all but last page with MSG_MORE */ page_chain_for_each(page) { unsigned l = min_t(unsigned, len, PAGE_SIZE); - if (!_drbd_send_page(mdev, page, 0, l, - page_chain_next(page) ? MSG_MORE : 0)) + if (_drbd_send_page(mdev, page, 0, l, + page_chain_next(page) ? MSG_MORE : 0)) return 0; len -= l; } From 9f69230cd6e8dc87bf6b55e46c51418a2b6ac322 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 10:49:09 +0100 Subject: [PATCH 232/609] drbd: _drbd_send_zc_ee(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index fa838b24eed..59fb39f15a1 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1500,16 +1500,19 @@ static int _drbd_send_zc_ee(struct drbd_conf *mdev, { struct page *page = peer_req->pages; unsigned len = peer_req->i.size; + int err; /* hint all but last page with MSG_MORE */ page_chain_for_each(page) { unsigned l = min_t(unsigned, len, PAGE_SIZE); - if (_drbd_send_page(mdev, page, 0, l, - page_chain_next(page) ? MSG_MORE : 0)) - return 0; + + err = _drbd_send_page(mdev, page, 0, l, + page_chain_next(page) ? MSG_MORE : 0); + if (err) + return err; len -= l; } - return 1; + return 0; } static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw) @@ -1634,7 +1637,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, ok = dgs == drbd_send(mdev->tconn, mdev->tconn->data.socket, dgb, dgs, 0); } if (ok) - ok = _drbd_send_zc_ee(mdev, peer_req); + ok = !_drbd_send_zc_ee(mdev, peer_req); drbd_put_data_sock(mdev->tconn); From 7b57b89d624cfdefc91d0a8b015c494c25a49292 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 11:35:20 +0100 Subject: [PATCH 233/609] drbd: drbd_send_block(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 23 ++++++++++++----------- drivers/block/drbd/drbd_worker.c | 6 +++--- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 59fb39f15a1..8fee2fe4c85 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1608,7 +1608,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, struct drbd_peer_request *peer_req) { - int ok; + int err; struct p_data p; void *dgb; int dgs; @@ -1627,21 +1627,22 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, * This one may be interrupted by DRBD_SIG and/or DRBD_SIGKILL * in response to admin command or module unload. */ - if (drbd_get_data_sock(mdev->tconn)) - return 0; - - ok = sizeof(p) == drbd_send(mdev->tconn, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0); - if (ok && dgs) { + err = drbd_get_data_sock(mdev->tconn); + if (err) + return err; + err = drbd_send_all(mdev->tconn, mdev->tconn->data.socket, &p, + sizeof(p), dgs ? MSG_MORE : 0); + if (!err && dgs) { dgb = mdev->tconn->int_dig_out; drbd_csum_ee(mdev, mdev->tconn->integrity_w_tfm, peer_req, dgb); - ok = dgs == drbd_send(mdev->tconn, mdev->tconn->data.socket, dgb, dgs, 0); + err = drbd_send_all(mdev->tconn, mdev->tconn->data.socket, dgb, + dgs, 0); } - if (ok) - ok = !_drbd_send_zc_ee(mdev, peer_req); - + if (!err) + err = _drbd_send_zc_ee(mdev, peer_req); drbd_put_data_sock(mdev->tconn); - return ok; + return err; } int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 3a2c51fd8ea..39b374185ca 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -920,7 +920,7 @@ int w_e_end_data_req(struct drbd_work *w, int cancel) } if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { - ok = drbd_send_block(mdev, P_DATA_REPLY, peer_req); + ok = !drbd_send_block(mdev, P_DATA_REPLY, peer_req); } else { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Sending NegDReply. sector=%llus.\n", @@ -966,7 +966,7 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel) } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { if (likely(mdev->state.pdsk >= D_INCONSISTENT)) { inc_rs_pending(mdev); - ok = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req); + ok = !drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req); } else { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Not sending RSDataReply, " @@ -1040,7 +1040,7 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */ peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */ kfree(di); - ok = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req); + ok = !drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req); } } else { ok = !drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req); From 7fae55da38e810c75b97cc56380aa6f735504216 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 11:46:33 +0100 Subject: [PATCH 234/609] drbd: _drbd_send_bio(), _drbd_send_zc_bio(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 8fee2fe4c85..77c957ff7a0 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1473,12 +1473,15 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) int i; /* hint all but last page with MSG_MORE */ __bio_for_each_segment(bvec, bio, i, 0) { - if (_drbd_no_send_page(mdev, bvec->bv_page, - bvec->bv_offset, bvec->bv_len, - i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) - return 0; + int err; + + err = _drbd_no_send_page(mdev, bvec->bv_page, + bvec->bv_offset, bvec->bv_len, + i == bio->bi_vcnt - 1 ? 0 : MSG_MORE); + if (err) + return err; } - return 1; + return 0; } static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) @@ -1487,12 +1490,15 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) int i; /* hint all but last page with MSG_MORE */ __bio_for_each_segment(bvec, bio, i, 0) { - if (_drbd_send_page(mdev, bvec->bv_page, - bvec->bv_offset, bvec->bv_len, - i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) - return 0; + int err; + + err = _drbd_send_page(mdev, bvec->bv_page, + bvec->bv_offset, bvec->bv_len, + i == bio->bi_vcnt - 1 ? 0 : MSG_MORE); + if (err) + return err; } - return 1; + return 0; } static int _drbd_send_zc_ee(struct drbd_conf *mdev, @@ -1576,9 +1582,9 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) * receiving side, we sure have detected corruption elsewhere. */ if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A || dgs) - ok = _drbd_send_bio(mdev, req->master_bio); + ok = !_drbd_send_bio(mdev, req->master_bio); else - ok = _drbd_send_zc_bio(mdev, req->master_bio); + ok = !_drbd_send_zc_bio(mdev, req->master_bio); /* double check digest, sometimes buffers have been modified in flight. */ if (dgs > 0 && dgs <= 64) { From 6bdb9b0e230aae94b084d8a375363ada056653b5 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 11:52:58 +0100 Subject: [PATCH 235/609] drbd: drbd_send_dblock(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 23 ++++++++++++----------- drivers/block/drbd/drbd_worker.c | 2 +- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 77c957ff7a0..2f203bab593 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1537,14 +1537,15 @@ static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw) */ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) { - int ok = 1; + int err; struct p_data p; unsigned int dp_flags = 0; void *dgb; int dgs; - if (drbd_get_data_sock(mdev->tconn)) - return 0; + err = drbd_get_data_sock(mdev->tconn); + if (err) + return err; dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ? crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0; @@ -1562,14 +1563,14 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) p.dp_flags = cpu_to_be32(dp_flags); set_bit(UNPLUG_REMOTE, &mdev->flags); - ok = (sizeof(p) == - drbd_send(mdev->tconn, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0)); - if (ok && dgs) { + err = drbd_send_all(mdev->tconn, mdev->tconn->data.socket, &p, + sizeof(p), dgs ? MSG_MORE : 0); + if (!err && dgs) { dgb = mdev->tconn->int_dig_out; drbd_csum_bio(mdev, mdev->tconn->integrity_w_tfm, req->master_bio, dgb); - ok = dgs == drbd_send(mdev->tconn, mdev->tconn->data.socket, dgb, dgs, 0); + err = drbd_send_all(mdev->tconn, mdev->tconn->data.socket, dgb, dgs, 0); } - if (ok) { + if (!err) { /* For protocol A, we have to memcpy the payload into * socket buffers, as we may complete right away * as soon as we handed it over to tcp, at which point the data @@ -1582,9 +1583,9 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) * receiving side, we sure have detected corruption elsewhere. */ if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A || dgs) - ok = !_drbd_send_bio(mdev, req->master_bio); + err = _drbd_send_bio(mdev, req->master_bio); else - ok = !_drbd_send_zc_bio(mdev, req->master_bio); + err = _drbd_send_zc_bio(mdev, req->master_bio); /* double check digest, sometimes buffers have been modified in flight. */ if (dgs > 0 && dgs <= 64) { @@ -1604,7 +1605,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) drbd_put_data_sock(mdev->tconn); - return ok; + return err; } /* answer packet, used to send data back for read requests: diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 39b374185ca..b42ba1af806 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1264,7 +1264,7 @@ int w_send_dblock(struct drbd_work *w, int cancel) return 1; } - ok = drbd_send_dblock(mdev, req); + ok = !drbd_send_dblock(mdev, req); req_mod(req, ok ? HANDED_OVER_TO_NETWORK : SEND_FAILED); return ok; From a896527c0658f9073413d46c2401448cdc0427ff Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 16:08:22 +0100 Subject: [PATCH 236/609] drbd: drbd_send_short_cmd(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_worker.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index bcb3bc25a72..173fed633e2 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1937,7 +1937,7 @@ static inline int drbd_send_short_cmd(struct drbd_conf *mdev, enum drbd_packet cmd) { struct p_header h; - return !drbd_send_cmd(mdev, &mdev->tconn->data, cmd, &h, sizeof(h)); + return drbd_send_cmd(mdev, &mdev->tconn->data, cmd, &h, sizeof(h)); } static inline int drbd_send_ping(struct drbd_tconn *tconn) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index b42ba1af806..0dd2f2d81de 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1227,7 +1227,7 @@ int w_send_write_hint(struct drbd_work *w, int cancel) struct drbd_conf *mdev = w->mdev; if (cancel) return 1; - return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE); + return !drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE); } int w_send_oos(struct drbd_work *w, int cancel) From b2f0ab62ecfe8711fefb82223b40430f8141a949 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 14:53:19 +0100 Subject: [PATCH 237/609] drbd: Temporarily change the return type of all worker callbacks This helps to ensure that we don't miss one of them when changing their return value semantics. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 6 ++--- drivers/block/drbd/drbd_int.h | 37 +++++++++++++-------------- drivers/block/drbd/drbd_main.c | 13 +++++----- drivers/block/drbd/drbd_receiver.c | 12 ++++----- drivers/block/drbd/drbd_state.c | 6 ++--- drivers/block/drbd/drbd_worker.c | 40 ++++++++++++++---------------- 6 files changed, 56 insertions(+), 58 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 89f1a6904a4..ac96cf9c896 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -106,7 +106,7 @@ struct drbd_atodb_wait { }; -int w_al_write_transaction(struct drbd_work *, int); +static long w_al_write_transaction(struct drbd_work *, int); static int _drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, @@ -298,7 +298,7 @@ static unsigned int rs_extent_to_bm_page(unsigned int rs_enr) (BM_EXT_SHIFT - BM_BLOCK_SHIFT)); } -int +static long w_al_write_transaction(struct drbd_work *w, int unused) { struct update_al_work *aw = container_of(w, struct update_al_work, w); @@ -698,7 +698,7 @@ void drbd_al_shrink(struct drbd_conf *mdev) wake_up(&mdev->al_wait); } -static int w_update_odbm(struct drbd_work *w, int unused) +static long w_update_odbm(struct drbd_work *w, int unused) { struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w); struct drbd_conf *mdev = w->mdev; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 173fed633e2..b42387dadd5 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -644,7 +644,7 @@ static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi) } struct drbd_work; -typedef int (*drbd_work_cb)(struct drbd_work *, int cancel); +typedef long (*drbd_work_cb)(struct drbd_work *, int cancel); struct drbd_work { struct list_head list; drbd_work_cb cb; @@ -1546,23 +1546,24 @@ extern void drbd_csum_bio(struct drbd_conf *, struct crypto_hash *, struct bio * extern void drbd_csum_ee(struct drbd_conf *, struct crypto_hash *, struct drbd_peer_request *, void *); /* worker callbacks */ -extern int w_read_retry_remote(struct drbd_work *, int); -extern int w_e_end_data_req(struct drbd_work *, int); -extern int w_e_end_rsdata_req(struct drbd_work *, int); -extern int w_e_end_csum_rs_req(struct drbd_work *, int); -extern int w_e_end_ov_reply(struct drbd_work *, int); -extern int w_e_end_ov_req(struct drbd_work *, int); -extern int w_ov_finished(struct drbd_work *, int); -extern int w_resync_timer(struct drbd_work *, int); -extern int w_send_write_hint(struct drbd_work *, int); -extern int w_send_dblock(struct drbd_work *, int); -extern int w_send_barrier(struct drbd_work *, int); -extern int w_send_read_req(struct drbd_work *, int); -extern int w_prev_work_done(struct drbd_work *, int); -extern int w_e_reissue(struct drbd_work *, int); -extern int w_restart_disk_io(struct drbd_work *, int); -extern int w_send_oos(struct drbd_work *, int); -extern int w_start_resync(struct drbd_work *, int); +extern long w_read_retry_remote(struct drbd_work *, int); +extern long w_e_end_data_req(struct drbd_work *, int); +extern long w_e_end_rsdata_req(struct drbd_work *, int); +extern long w_e_end_csum_rs_req(struct drbd_work *, int); +extern long w_e_end_ov_reply(struct drbd_work *, int); +extern long w_e_end_ov_req(struct drbd_work *, int); +extern long w_ov_finished(struct drbd_work *, int); +extern long w_resync_timer(struct drbd_work *, int); +extern long w_send_write_hint(struct drbd_work *, int); +extern long w_make_resync_request(struct drbd_work *, int); +extern long w_send_dblock(struct drbd_work *, int); +extern long w_send_barrier(struct drbd_work *, int); +extern long w_send_read_req(struct drbd_work *, int); +extern long w_prev_work_done(struct drbd_work *, int); +extern long w_e_reissue(struct drbd_work *, int); +extern long w_restart_disk_io(struct drbd_work *, int); +extern long w_send_oos(struct drbd_work *, int); +extern long w_start_resync(struct drbd_work *, int); extern void resync_timer_fn(unsigned long data); extern void start_resync_timer_fn(unsigned long data); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2f203bab593..dd4401b6842 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -64,10 +64,10 @@ int drbd_asender(struct drbd_thread *); int drbd_init(void); static int drbd_open(struct block_device *bdev, fmode_t mode); static int drbd_release(struct gendisk *gd, fmode_t mode); -static int w_md_sync(struct drbd_work *w, int unused); +static long w_md_sync(struct drbd_work *w, int unused); static void md_sync_timer_fn(unsigned long data); -static int w_bitmap_io(struct drbd_work *w, int unused); -static int w_go_diskless(struct drbd_work *w, int unused); +static long w_bitmap_io(struct drbd_work *w, int unused); +static long w_go_diskless(struct drbd_work *w, int unused); MODULE_AUTHOR("Philipp Reisner , " "Lars Ellenberg "); @@ -1828,7 +1828,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) atomic_set(&mdev->rs_pending_cnt, 0); atomic_set(&mdev->unacked_cnt, 0); atomic_set(&mdev->local_cnt, 0); - atomic_set(&mdev->pp_in_use, 0); atomic_set(&mdev->pp_in_use_by_net, 0); atomic_set(&mdev->rs_sect_in, 0); atomic_set(&mdev->rs_sect_ev, 0); @@ -2885,7 +2884,7 @@ int drbd_bmio_clear_n_write(struct drbd_conf *mdev) return rv; } -static int w_bitmap_io(struct drbd_work *w, int unused) +static long w_bitmap_io(struct drbd_work *w, int unused) { struct bm_io_work *work = container_of(w, struct bm_io_work, w); struct drbd_conf *mdev = w->mdev; @@ -2926,7 +2925,7 @@ void drbd_ldev_destroy(struct drbd_conf *mdev) clear_bit(GO_DISKLESS, &mdev->flags); } -static int w_go_diskless(struct drbd_work *w, int unused) +static long w_go_diskless(struct drbd_work *w, int unused) { struct drbd_conf *mdev = w->mdev; @@ -3042,7 +3041,7 @@ static void md_sync_timer_fn(unsigned long data) drbd_queue_work_front(&mdev->tconn->data.work, &mdev->md_sync_work); } -static int w_md_sync(struct drbd_work *w, int unused) +static long w_md_sync(struct drbd_work *w, int unused) { struct drbd_conf *mdev = w->mdev; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 9cd3d0d3ae4..4d0463c70ce 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -70,7 +70,7 @@ static int drbd_do_auth(struct drbd_tconn *tconn); static int drbd_disconnected(int vnr, void *p, void *data); static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event); -static int e_end_block(struct drbd_work *, int); +static long e_end_block(struct drbd_work *, int); #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) @@ -1461,7 +1461,7 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, /* e_end_resync_block() is called via * drbd_process_done_ee() by asender only */ -static int e_end_resync_block(struct drbd_work *w, int unused) +static long e_end_resync_block(struct drbd_work *w, int unused) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); @@ -1597,7 +1597,7 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd, return ok; } -static int w_restart_write(struct drbd_work *w, int cancel) +static long w_restart_write(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); struct drbd_conf *mdev = w->mdev; @@ -1645,7 +1645,7 @@ static void restart_conflicting_writes(struct drbd_conf *mdev, /* e_end_block() is called via drbd_process_done_ee(). * this means this function only runs in the asender thread */ -static int e_end_block(struct drbd_work *w, int cancel) +static long e_end_block(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); @@ -1699,12 +1699,12 @@ static int e_send_ack(struct drbd_work *w, enum drbd_packet ack) return ok; } -static int e_send_discard_write(struct drbd_work *w, int unused) +static long e_send_discard_write(struct drbd_work *w, int unused) { return e_send_ack(w, P_DISCARD_WRITE); } -static int e_send_retry_write(struct drbd_work *w, int unused) +static long e_send_retry_write(struct drbd_work *w, int unused) { struct drbd_tconn *tconn = w->mdev->tconn; diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 1c681110580..c6d97200970 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -37,7 +37,7 @@ struct after_state_chg_work { struct completion *done; }; -static int w_after_state_ch(struct drbd_work *w, int unused); +static long w_after_state_ch(struct drbd_work *w, int unused); static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); static void after_all_state_ch(struct drbd_tconn *tconn); @@ -918,7 +918,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, return rv; } -static int w_after_state_ch(struct drbd_work *w, int unused) +static long w_after_state_ch(struct drbd_work *w, int unused) { struct after_state_chg_work *ascw = container_of(w, struct after_state_chg_work, w); @@ -1289,7 +1289,7 @@ static void after_all_state_ch(struct drbd_tconn *tconn) } } -static int w_after_conn_state_ch(struct drbd_work *w, int unused) +static long w_after_conn_state_ch(struct drbd_work *w, int unused) { struct after_conn_state_chg_work *acscw = container_of(w, struct after_conn_state_chg_work, w); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 0dd2f2d81de..d38bf70a16d 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -39,8 +39,6 @@ #include "drbd_req.h" static int w_make_ov_request(struct drbd_work *w, int cancel); -static int w_make_resync_request(struct drbd_work *w, int cancel); - /* endio handlers: @@ -227,7 +225,7 @@ void drbd_request_endio(struct bio *bio, int error) complete_master_bio(mdev, &m); } -int w_read_retry_remote(struct drbd_work *w, int cancel) +long w_read_retry_remote(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); struct drbd_conf *mdev = w->mdev; @@ -296,7 +294,7 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio * } /* MAYBE merge common code with w_e_end_ov_req */ -static int w_e_send_csum(struct drbd_work *w, int cancel) +static long w_e_send_csum(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); struct drbd_conf *mdev = w->mdev; @@ -383,7 +381,7 @@ defer: return -EAGAIN; } -int w_resync_timer(struct drbd_work *w, int cancel) +long w_resync_timer(struct drbd_work *w, int cancel) { struct drbd_conf *mdev = w->mdev; switch (mdev->state.conn) { @@ -505,7 +503,7 @@ static int drbd_rs_number_requests(struct drbd_conf *mdev) return number; } -static int w_make_resync_request(struct drbd_work *w, int cancel) +long w_make_resync_request(struct drbd_work *w, int cancel) { struct drbd_conf *mdev = w->mdev; unsigned long bit; @@ -709,7 +707,7 @@ static int w_make_ov_request(struct drbd_work *w, int cancel) return 1; } -int w_ov_finished(struct drbd_work *w, int cancel) +long w_ov_finished(struct drbd_work *w, int cancel) { struct drbd_conf *mdev = w->mdev; kfree(w); @@ -719,7 +717,7 @@ int w_ov_finished(struct drbd_work *w, int cancel) return 1; } -static int w_resync_finished(struct drbd_work *w, int cancel) +static long w_resync_finished(struct drbd_work *w, int cancel) { struct drbd_conf *mdev = w->mdev; kfree(w); @@ -907,7 +905,7 @@ static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_requ * @w: work object. * @cancel: The connection will be closed anyways */ -int w_e_end_data_req(struct drbd_work *w, int cancel) +long w_e_end_data_req(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); struct drbd_conf *mdev = w->mdev; @@ -944,7 +942,7 @@ int w_e_end_data_req(struct drbd_work *w, int cancel) * @w: work object. * @cancel: The connection will be closed anyways */ -int w_e_end_rsdata_req(struct drbd_work *w, int cancel) +long w_e_end_rsdata_req(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); struct drbd_conf *mdev = w->mdev; @@ -993,7 +991,7 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel) return ok; } -int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) +long w_e_end_csum_rs_req(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); struct drbd_conf *mdev = w->mdev; @@ -1056,7 +1054,7 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) return ok; } -int w_e_end_ov_req(struct drbd_work *w, int cancel) +long w_e_end_ov_req(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); struct drbd_conf *mdev = w->mdev; @@ -1114,7 +1112,7 @@ void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size) drbd_set_out_of_sync(mdev, sector, size); } -int w_e_end_ov_reply(struct drbd_work *w, int cancel) +long w_e_end_ov_reply(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); struct drbd_conf *mdev = w->mdev; @@ -1182,7 +1180,7 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) return ok; } -int w_prev_work_done(struct drbd_work *w, int cancel) +long w_prev_work_done(struct drbd_work *w, int cancel) { struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w); @@ -1190,7 +1188,7 @@ int w_prev_work_done(struct drbd_work *w, int cancel) return 1; } -int w_send_barrier(struct drbd_work *w, int cancel) +long w_send_barrier(struct drbd_work *w, int cancel) { struct drbd_tl_epoch *b = container_of(w, struct drbd_tl_epoch, w); struct drbd_conf *mdev = w->mdev; @@ -1222,7 +1220,7 @@ int w_send_barrier(struct drbd_work *w, int cancel) return ok; } -int w_send_write_hint(struct drbd_work *w, int cancel) +long w_send_write_hint(struct drbd_work *w, int cancel) { struct drbd_conf *mdev = w->mdev; if (cancel) @@ -1230,7 +1228,7 @@ int w_send_write_hint(struct drbd_work *w, int cancel) return !drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE); } -int w_send_oos(struct drbd_work *w, int cancel) +long w_send_oos(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); struct drbd_conf *mdev = w->mdev; @@ -1253,7 +1251,7 @@ int w_send_oos(struct drbd_work *w, int cancel) * @w: work object. * @cancel: The connection will be closed anyways */ -int w_send_dblock(struct drbd_work *w, int cancel) +long w_send_dblock(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); struct drbd_conf *mdev = w->mdev; @@ -1276,7 +1274,7 @@ int w_send_dblock(struct drbd_work *w, int cancel) * @w: work object. * @cancel: The connection will be closed anyways */ -int w_send_read_req(struct drbd_work *w, int cancel) +long w_send_read_req(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); struct drbd_conf *mdev = w->mdev; @@ -1295,7 +1293,7 @@ int w_send_read_req(struct drbd_work *w, int cancel) return ok; } -int w_restart_disk_io(struct drbd_work *w, int cancel) +long w_restart_disk_io(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); struct drbd_conf *mdev = w->mdev; @@ -1452,7 +1450,7 @@ void start_resync_timer_fn(unsigned long data) drbd_queue_work(&mdev->tconn->data.work, &mdev->start_resync_work); } -int w_start_resync(struct drbd_work *w, int cancel) +long w_start_resync(struct drbd_work *w, int cancel) { struct drbd_conf *mdev = w->mdev; From 99920dc5c5fe52182fe922aa70330861e2b6418b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 15:31:39 +0100 Subject: [PATCH 238/609] drbd: Make all worker callbacks return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 16 +-- drivers/block/drbd/drbd_int.h | 38 +++--- drivers/block/drbd/drbd_main.c | 18 +-- drivers/block/drbd/drbd_receiver.c | 40 +++--- drivers/block/drbd/drbd_state.c | 10 +- drivers/block/drbd/drbd_worker.c | 196 +++++++++++++++-------------- 6 files changed, 160 insertions(+), 158 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index ac96cf9c896..fe60b18c1da 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -106,7 +106,7 @@ struct drbd_atodb_wait { }; -static long w_al_write_transaction(struct drbd_work *, int); +static int w_al_write_transaction(struct drbd_work *, int); static int _drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, @@ -298,7 +298,7 @@ static unsigned int rs_extent_to_bm_page(unsigned int rs_enr) (BM_EXT_SHIFT - BM_BLOCK_SHIFT)); } -static long +static int w_al_write_transaction(struct drbd_work *w, int unused) { struct update_al_work *aw = container_of(w, struct update_al_work, w); @@ -315,7 +315,7 @@ w_al_write_transaction(struct drbd_work *w, int unused) drbd_disk_str(mdev->state.disk)); aw->err = -EIO; complete(&((struct update_al_work *)w)->event); - return 1; + return 0; } /* The bitmap write may have failed, causing a state change. */ @@ -326,7 +326,7 @@ w_al_write_transaction(struct drbd_work *w, int unused) aw->err = -EIO; complete(&((struct update_al_work *)w)->event); put_ldev(mdev); - return 1; + return 0; } mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */ @@ -404,7 +404,7 @@ w_al_write_transaction(struct drbd_work *w, int unused) complete(&((struct update_al_work *)w)->event); put_ldev(mdev); - return 1; + return 0; } /* FIXME @@ -698,7 +698,7 @@ void drbd_al_shrink(struct drbd_conf *mdev) wake_up(&mdev->al_wait); } -static long w_update_odbm(struct drbd_work *w, int unused) +static int w_update_odbm(struct drbd_work *w, int unused) { struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w); struct drbd_conf *mdev = w->mdev; @@ -708,7 +708,7 @@ static long w_update_odbm(struct drbd_work *w, int unused) if (__ratelimit(&drbd_ratelimit_state)) dev_warn(DEV, "Can not update on disk bitmap, local IO disabled.\n"); kfree(udw); - return 1; + return 0; } drbd_bm_write_page(mdev, rs_extent_to_bm_page(udw->enr)); @@ -728,7 +728,7 @@ static long w_update_odbm(struct drbd_work *w, int unused) } drbd_bcast_event(mdev, &sib); - return 1; + return 0; } diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index b42387dadd5..300e85ef94d 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -644,7 +644,7 @@ static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi) } struct drbd_work; -typedef long (*drbd_work_cb)(struct drbd_work *, int cancel); +typedef int (*drbd_work_cb)(struct drbd_work *, int cancel); struct drbd_work { struct list_head list; drbd_work_cb cb; @@ -1546,24 +1546,24 @@ extern void drbd_csum_bio(struct drbd_conf *, struct crypto_hash *, struct bio * extern void drbd_csum_ee(struct drbd_conf *, struct crypto_hash *, struct drbd_peer_request *, void *); /* worker callbacks */ -extern long w_read_retry_remote(struct drbd_work *, int); -extern long w_e_end_data_req(struct drbd_work *, int); -extern long w_e_end_rsdata_req(struct drbd_work *, int); -extern long w_e_end_csum_rs_req(struct drbd_work *, int); -extern long w_e_end_ov_reply(struct drbd_work *, int); -extern long w_e_end_ov_req(struct drbd_work *, int); -extern long w_ov_finished(struct drbd_work *, int); -extern long w_resync_timer(struct drbd_work *, int); -extern long w_send_write_hint(struct drbd_work *, int); -extern long w_make_resync_request(struct drbd_work *, int); -extern long w_send_dblock(struct drbd_work *, int); -extern long w_send_barrier(struct drbd_work *, int); -extern long w_send_read_req(struct drbd_work *, int); -extern long w_prev_work_done(struct drbd_work *, int); -extern long w_e_reissue(struct drbd_work *, int); -extern long w_restart_disk_io(struct drbd_work *, int); -extern long w_send_oos(struct drbd_work *, int); -extern long w_start_resync(struct drbd_work *, int); +extern int w_read_retry_remote(struct drbd_work *, int); +extern int w_e_end_data_req(struct drbd_work *, int); +extern int w_e_end_rsdata_req(struct drbd_work *, int); +extern int w_e_end_csum_rs_req(struct drbd_work *, int); +extern int w_e_end_ov_reply(struct drbd_work *, int); +extern int w_e_end_ov_req(struct drbd_work *, int); +extern int w_ov_finished(struct drbd_work *, int); +extern int w_resync_timer(struct drbd_work *, int); +extern int w_send_write_hint(struct drbd_work *, int); +extern int w_make_resync_request(struct drbd_work *, int); +extern int w_send_dblock(struct drbd_work *, int); +extern int w_send_barrier(struct drbd_work *, int); +extern int w_send_read_req(struct drbd_work *, int); +extern int w_prev_work_done(struct drbd_work *, int); +extern int w_e_reissue(struct drbd_work *, int); +extern int w_restart_disk_io(struct drbd_work *, int); +extern int w_send_oos(struct drbd_work *, int); +extern int w_start_resync(struct drbd_work *, int); extern void resync_timer_fn(unsigned long data); extern void start_resync_timer_fn(unsigned long data); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index dd4401b6842..2e7fd4d326d 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -64,10 +64,10 @@ int drbd_asender(struct drbd_thread *); int drbd_init(void); static int drbd_open(struct block_device *bdev, fmode_t mode); static int drbd_release(struct gendisk *gd, fmode_t mode); -static long w_md_sync(struct drbd_work *w, int unused); +static int w_md_sync(struct drbd_work *w, int unused); static void md_sync_timer_fn(unsigned long data); -static long w_bitmap_io(struct drbd_work *w, int unused); -static long w_go_diskless(struct drbd_work *w, int unused); +static int w_bitmap_io(struct drbd_work *w, int unused); +static int w_go_diskless(struct drbd_work *w, int unused); MODULE_AUTHOR("Philipp Reisner , " "Lars Ellenberg "); @@ -2884,7 +2884,7 @@ int drbd_bmio_clear_n_write(struct drbd_conf *mdev) return rv; } -static long w_bitmap_io(struct drbd_work *w, int unused) +static int w_bitmap_io(struct drbd_work *w, int unused) { struct bm_io_work *work = container_of(w, struct bm_io_work, w); struct drbd_conf *mdev = w->mdev; @@ -2909,7 +2909,7 @@ static long w_bitmap_io(struct drbd_work *w, int unused) work->why = NULL; work->flags = 0; - return 1; + return 0; } void drbd_ldev_destroy(struct drbd_conf *mdev) @@ -2925,7 +2925,7 @@ void drbd_ldev_destroy(struct drbd_conf *mdev) clear_bit(GO_DISKLESS, &mdev->flags); } -static long w_go_diskless(struct drbd_work *w, int unused) +static int w_go_diskless(struct drbd_work *w, int unused) { struct drbd_conf *mdev = w->mdev; @@ -2935,7 +2935,7 @@ static long w_go_diskless(struct drbd_work *w, int unused) * the protected members anymore, though, so once put_ldev reaches zero * again, it will be safe to free them. */ drbd_force_state(mdev, NS(disk, D_DISKLESS)); - return 1; + return 0; } void drbd_go_diskless(struct drbd_conf *mdev) @@ -3041,7 +3041,7 @@ static void md_sync_timer_fn(unsigned long data) drbd_queue_work_front(&mdev->tconn->data.work, &mdev->md_sync_work); } -static long w_md_sync(struct drbd_work *w, int unused) +static int w_md_sync(struct drbd_work *w, int unused) { struct drbd_conf *mdev = w->mdev; @@ -3051,7 +3051,7 @@ static long w_md_sync(struct drbd_work *w, int unused) mdev->last_md_mark_dirty.func, mdev->last_md_mark_dirty.line); #endif drbd_md_sync(mdev); - return 1; + return 0; } const char *cmdname(enum drbd_packet cmd) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 4d0463c70ce..d3e3188575e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -70,7 +70,7 @@ static int drbd_do_auth(struct drbd_tconn *tconn); static int drbd_disconnected(int vnr, void *p, void *data); static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event); -static long e_end_block(struct drbd_work *, int); +static int e_end_block(struct drbd_work *, int); #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) @@ -425,7 +425,7 @@ static int drbd_process_done_ee(struct drbd_conf *mdev) */ list_for_each_entry_safe(peer_req, t, &work_list, w.list) { /* list_del not necessary, next/prev members not touched */ - ok = peer_req->w.cb(&peer_req->w, !ok) && ok; + ok = !peer_req->w.cb(&peer_req->w, !ok) && ok; drbd_free_ee(mdev, peer_req); } wake_up(&mdev->ee_wait); @@ -1461,28 +1461,28 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, /* e_end_resync_block() is called via * drbd_process_done_ee() by asender only */ -static long e_end_resync_block(struct drbd_work *w, int unused) +static int e_end_resync_block(struct drbd_work *w, int unused) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); struct drbd_conf *mdev = w->mdev; sector_t sector = peer_req->i.sector; - int ok; + int err; D_ASSERT(drbd_interval_empty(&peer_req->i)); if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { drbd_set_in_sync(mdev, sector, peer_req->i.size); - ok = !drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req); + err = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req); } else { /* Record failure to sync */ drbd_rs_failed_io(mdev, sector, peer_req->i.size); - ok = !drbd_send_ack(mdev, P_NEG_ACK, peer_req); + err = drbd_send_ack(mdev, P_NEG_ACK, peer_req); } dec_unacked(mdev); - return ok; + return err; } static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local) @@ -1597,7 +1597,7 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd, return ok; } -static long w_restart_write(struct drbd_work *w, int cancel) +static int w_restart_write(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); struct drbd_conf *mdev = w->mdev; @@ -1608,7 +1608,7 @@ static long w_restart_write(struct drbd_work *w, int cancel) spin_lock_irqsave(&mdev->tconn->req_lock, flags); if (!expect(req->rq_state & RQ_POSTPONED)) { spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); - return 0; + return -EIO; } bio = req->master_bio; start_time = req->start_time; @@ -1618,7 +1618,7 @@ static long w_restart_write(struct drbd_work *w, int cancel) while (__drbd_make_request(mdev, bio, start_time)) /* retry */ ; - return 1; + return 0; } static void restart_conflicting_writes(struct drbd_conf *mdev, @@ -1645,13 +1645,13 @@ static void restart_conflicting_writes(struct drbd_conf *mdev, /* e_end_block() is called via drbd_process_done_ee(). * this means this function only runs in the asender thread */ -static long e_end_block(struct drbd_work *w, int cancel) +static int e_end_block(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); struct drbd_conf *mdev = w->mdev; sector_t sector = peer_req->i.sector; - int ok = 1, pcmd; + int err = 0, pcmd; if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) { if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { @@ -1659,11 +1659,11 @@ static long e_end_block(struct drbd_work *w, int cancel) mdev->state.conn <= C_PAUSED_SYNC_T && peer_req->flags & EE_MAY_SET_IN_SYNC) ? P_RS_WRITE_ACK : P_WRITE_ACK; - ok &= !drbd_send_ack(mdev, pcmd, peer_req); + err = drbd_send_ack(mdev, pcmd, peer_req); if (pcmd == P_RS_WRITE_ACK) drbd_set_in_sync(mdev, sector, peer_req->i.size); } else { - ok = !drbd_send_ack(mdev, P_NEG_ACK, peer_req); + err = drbd_send_ack(mdev, P_NEG_ACK, peer_req); /* we expect it to be marked out of sync anyways... * maybe assert this? */ } @@ -1683,7 +1683,7 @@ static long e_end_block(struct drbd_work *w, int cancel) drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); - return ok; + return err; } static int e_send_ack(struct drbd_work *w, enum drbd_packet ack) @@ -1691,20 +1691,20 @@ static int e_send_ack(struct drbd_work *w, enum drbd_packet ack) struct drbd_conf *mdev = w->mdev; struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); - int ok; + int err; - ok = !drbd_send_ack(mdev, ack, peer_req); + err = drbd_send_ack(mdev, ack, peer_req); dec_unacked(mdev); - return ok; + return err; } -static long e_send_discard_write(struct drbd_work *w, int unused) +static int e_send_discard_write(struct drbd_work *w, int unused) { return e_send_ack(w, P_DISCARD_WRITE); } -static long e_send_retry_write(struct drbd_work *w, int unused) +static int e_send_retry_write(struct drbd_work *w, int unused) { struct drbd_tconn *tconn = w->mdev->tconn; diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index c6d97200970..79e4a90a6c7 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -37,7 +37,7 @@ struct after_state_chg_work { struct completion *done; }; -static long w_after_state_ch(struct drbd_work *w, int unused); +static int w_after_state_ch(struct drbd_work *w, int unused); static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); static void after_all_state_ch(struct drbd_tconn *tconn); @@ -918,7 +918,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, return rv; } -static long w_after_state_ch(struct drbd_work *w, int unused) +static int w_after_state_ch(struct drbd_work *w, int unused) { struct after_state_chg_work *ascw = container_of(w, struct after_state_chg_work, w); @@ -931,7 +931,7 @@ static long w_after_state_ch(struct drbd_work *w, int unused) } kfree(ascw); - return 1; + return 0; } static void abw_start_sync(struct drbd_conf *mdev, int rv) @@ -1289,7 +1289,7 @@ static void after_all_state_ch(struct drbd_tconn *tconn) } } -static long w_after_conn_state_ch(struct drbd_work *w, int unused) +static int w_after_conn_state_ch(struct drbd_work *w, int unused) { struct after_conn_state_chg_work *acscw = container_of(w, struct after_conn_state_chg_work, w); @@ -1306,7 +1306,7 @@ static long w_after_conn_state_ch(struct drbd_work *w, int unused) //conn_err(tconn, STATE_FMT, STATE_ARGS("nms", nms)); after_all_state_ch(tconn); - return 1; + return 0; } static void print_conn_state_change(struct drbd_tconn *tconn, enum drbd_conns oc, enum drbd_conns nc) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index d38bf70a16d..0f0beec9a19 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -225,7 +225,7 @@ void drbd_request_endio(struct bio *bio, int error) complete_master_bio(mdev, &m); } -long w_read_retry_remote(struct drbd_work *w, int cancel) +int w_read_retry_remote(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); struct drbd_conf *mdev = w->mdev; @@ -238,7 +238,7 @@ long w_read_retry_remote(struct drbd_work *w, int cancel) if (cancel || mdev->state.pdsk != D_UP_TO_DATE) { _req_mod(req, READ_RETRY_REMOTE_CANCELED); spin_unlock_irq(&mdev->tconn->req_lock); - return 1; + return 0; } spin_unlock_irq(&mdev->tconn->req_lock); @@ -294,13 +294,13 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio * } /* MAYBE merge common code with w_e_end_ov_req */ -static long w_e_send_csum(struct drbd_work *w, int cancel) +static int w_e_send_csum(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); struct drbd_conf *mdev = w->mdev; int digest_size; void *digest; - int ok = 1; + int err = 0; if (unlikely(cancel)) goto out; @@ -322,22 +322,22 @@ static long w_e_send_csum(struct drbd_work *w, int cancel) drbd_free_ee(mdev, peer_req); peer_req = NULL; inc_rs_pending(mdev); - ok = !drbd_send_drequest_csum(mdev, sector, size, + err = drbd_send_drequest_csum(mdev, sector, size, digest, digest_size, P_CSUM_RS_REQUEST); kfree(digest); } else { dev_err(DEV, "kmalloc() of digest failed.\n"); - ok = 0; + err = -ENOMEM; } out: if (peer_req) drbd_free_ee(mdev, peer_req); - if (unlikely(!ok)) + if (unlikely(err)) dev_err(DEV, "drbd_send_drequest(..., csum) failed\n"); - return ok; + return err; } #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) @@ -381,7 +381,7 @@ defer: return -EAGAIN; } -long w_resync_timer(struct drbd_work *w, int cancel) +int w_resync_timer(struct drbd_work *w, int cancel) { struct drbd_conf *mdev = w->mdev; switch (mdev->state.conn) { @@ -393,7 +393,7 @@ long w_resync_timer(struct drbd_work *w, int cancel) break; } - return 1; + return 0; } void resync_timer_fn(unsigned long data) @@ -503,7 +503,7 @@ static int drbd_rs_number_requests(struct drbd_conf *mdev) return number; } -long w_make_resync_request(struct drbd_work *w, int cancel) +int w_make_resync_request(struct drbd_work *w, int cancel) { struct drbd_conf *mdev = w->mdev; unsigned long bit; @@ -515,12 +515,12 @@ long w_make_resync_request(struct drbd_work *w, int cancel) int i = 0; if (unlikely(cancel)) - return 1; + return 0; if (mdev->rs_total == 0) { /* empty resync? */ drbd_resync_finished(mdev); - return 1; + return 0; } if (!get_ldev(mdev)) { @@ -529,7 +529,7 @@ long w_make_resync_request(struct drbd_work *w, int cancel) to continue resync with a broken disk makes no sense at all */ dev_err(DEV, "Disk broke down during resync!\n"); - return 1; + return 0; } max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9; @@ -558,7 +558,7 @@ next_sector: if (bit == DRBD_END_OF_BITMAP) { mdev->bm_resync_fo = drbd_bm_bits(mdev); put_ldev(mdev); - return 1; + return 0; } sector = BM_BIT_TO_SECT(bit); @@ -621,7 +621,7 @@ next_sector: switch (read_for_csum(mdev, sector, size)) { case -EIO: /* Disk failure */ put_ldev(mdev); - return 0; + return -EIO; case -EAGAIN: /* allocation failed, or ldev busy */ drbd_rs_complete_io(mdev, sector); mdev->bm_resync_fo = BM_SECT_TO_BIT(sector); @@ -634,13 +634,16 @@ next_sector: BUG(); } } else { + int err; + inc_rs_pending(mdev); - if (drbd_send_drequest(mdev, P_RS_DATA_REQUEST, - sector, size, ID_SYNCER)) { + err = drbd_send_drequest(mdev, P_RS_DATA_REQUEST, + sector, size, ID_SYNCER); + if (err) { dev_err(DEV, "drbd_send_drequest() failed, aborting...\n"); dec_rs_pending(mdev); put_ldev(mdev); - return 0; + return err; } } } @@ -653,14 +656,14 @@ next_sector: * until then resync "work" is "inactive" ... */ put_ldev(mdev); - return 1; + return 0; } requeue: mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); put_ldev(mdev); - return 1; + return 0; } static int w_make_ov_request(struct drbd_work *w, int cancel) @@ -707,24 +710,24 @@ static int w_make_ov_request(struct drbd_work *w, int cancel) return 1; } -long w_ov_finished(struct drbd_work *w, int cancel) +int w_ov_finished(struct drbd_work *w, int cancel) { struct drbd_conf *mdev = w->mdev; kfree(w); ov_oos_print(mdev); drbd_resync_finished(mdev); - return 1; + return 0; } -static long w_resync_finished(struct drbd_work *w, int cancel) +static int w_resync_finished(struct drbd_work *w, int cancel) { struct drbd_conf *mdev = w->mdev; kfree(w); drbd_resync_finished(mdev); - return 1; + return 0; } static void ping_peer(struct drbd_conf *mdev) @@ -905,35 +908,35 @@ static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_requ * @w: work object. * @cancel: The connection will be closed anyways */ -long w_e_end_data_req(struct drbd_work *w, int cancel) +int w_e_end_data_req(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); struct drbd_conf *mdev = w->mdev; - int ok; + int err; if (unlikely(cancel)) { drbd_free_ee(mdev, peer_req); dec_unacked(mdev); - return 1; + return 0; } if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { - ok = !drbd_send_block(mdev, P_DATA_REPLY, peer_req); + err = drbd_send_block(mdev, P_DATA_REPLY, peer_req); } else { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Sending NegDReply. sector=%llus.\n", (unsigned long long)peer_req->i.sector); - ok = !drbd_send_ack(mdev, P_NEG_DREPLY, peer_req); + err = drbd_send_ack(mdev, P_NEG_DREPLY, peer_req); } dec_unacked(mdev); move_to_net_ee_or_free(mdev, peer_req); - if (unlikely(!ok)) + if (unlikely(err)) dev_err(DEV, "drbd_send_block() failed\n"); - return ok; + return err; } /** @@ -942,16 +945,16 @@ long w_e_end_data_req(struct drbd_work *w, int cancel) * @w: work object. * @cancel: The connection will be closed anyways */ -long w_e_end_rsdata_req(struct drbd_work *w, int cancel) +int w_e_end_rsdata_req(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); struct drbd_conf *mdev = w->mdev; - int ok; + int err; if (unlikely(cancel)) { drbd_free_ee(mdev, peer_req); dec_unacked(mdev); - return 1; + return 0; } if (get_ldev_if_state(mdev, D_FAILED)) { @@ -960,23 +963,23 @@ long w_e_end_rsdata_req(struct drbd_work *w, int cancel) } if (mdev->state.conn == C_AHEAD) { - ok = !drbd_send_ack(mdev, P_RS_CANCEL, peer_req); + err = drbd_send_ack(mdev, P_RS_CANCEL, peer_req); } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { if (likely(mdev->state.pdsk >= D_INCONSISTENT)) { inc_rs_pending(mdev); - ok = !drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req); + err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req); } else { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Not sending RSDataReply, " "partner DISKLESS!\n"); - ok = 1; + err = 0; } } else { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Sending NegRSDReply. sector %llus.\n", (unsigned long long)peer_req->i.sector); - ok = !drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req); + err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req); /* update resync data with failure */ drbd_rs_failed_io(mdev, peer_req->i.sector, peer_req->i.size); @@ -986,24 +989,24 @@ long w_e_end_rsdata_req(struct drbd_work *w, int cancel) move_to_net_ee_or_free(mdev, peer_req); - if (unlikely(!ok)) + if (unlikely(err)) dev_err(DEV, "drbd_send_block() failed\n"); - return ok; + return err; } -long w_e_end_csum_rs_req(struct drbd_work *w, int cancel) +int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); struct drbd_conf *mdev = w->mdev; struct digest_info *di; int digest_size; void *digest = NULL; - int ok, eq = 0; + int err, eq = 0; if (unlikely(cancel)) { drbd_free_ee(mdev, peer_req); dec_unacked(mdev); - return 1; + return 0; } if (get_ldev(mdev)) { @@ -1032,16 +1035,16 @@ long w_e_end_csum_rs_req(struct drbd_work *w, int cancel) drbd_set_in_sync(mdev, peer_req->i.sector, peer_req->i.size); /* rs_same_csums unit is BM_BLOCK_SIZE */ mdev->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT; - ok = !drbd_send_ack(mdev, P_RS_IS_IN_SYNC, peer_req); + err = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, peer_req); } else { inc_rs_pending(mdev); peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */ peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */ kfree(di); - ok = !drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req); + err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req); } } else { - ok = !drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req); + err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req); if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n"); } @@ -1049,12 +1052,12 @@ long w_e_end_csum_rs_req(struct drbd_work *w, int cancel) dec_unacked(mdev); move_to_net_ee_or_free(mdev, peer_req); - if (unlikely(!ok)) + if (unlikely(err)) dev_err(DEV, "drbd_send_block/ack() failed\n"); - return ok; + return err; } -long w_e_end_ov_req(struct drbd_work *w, int cancel) +int w_e_end_ov_req(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); struct drbd_conf *mdev = w->mdev; @@ -1062,7 +1065,7 @@ long w_e_end_ov_req(struct drbd_work *w, int cancel) unsigned int size = peer_req->i.size; int digest_size; void *digest; - int ok = 1; + int err = 0; if (unlikely(cancel)) goto out; @@ -1070,7 +1073,7 @@ long w_e_end_ov_req(struct drbd_work *w, int cancel) digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (!digest) { - ok = 0; /* terminate the connection in case the allocation failed */ + err = 1; /* terminate the connection in case the allocation failed */ goto out; } @@ -1087,10 +1090,8 @@ long w_e_end_ov_req(struct drbd_work *w, int cancel) drbd_free_ee(mdev, peer_req); peer_req = NULL; inc_rs_pending(mdev); - ok = !drbd_send_drequest_csum(mdev, sector, size, - digest, digest_size, - P_OV_REPLY); - if (!ok) + err = drbd_send_drequest_csum(mdev, sector, size, digest, digest_size, P_OV_REPLY); + if (err) dec_rs_pending(mdev); kfree(digest); @@ -1098,7 +1099,7 @@ out: if (peer_req) drbd_free_ee(mdev, peer_req); dec_unacked(mdev); - return ok; + return err; } void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size) @@ -1112,7 +1113,7 @@ void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size) drbd_set_out_of_sync(mdev, sector, size); } -long w_e_end_ov_reply(struct drbd_work *w, int cancel) +int w_e_end_ov_reply(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); struct drbd_conf *mdev = w->mdev; @@ -1121,12 +1122,12 @@ long w_e_end_ov_reply(struct drbd_work *w, int cancel) sector_t sector = peer_req->i.sector; unsigned int size = peer_req->i.size; int digest_size; - int ok, eq = 0; + int err, eq = 0; if (unlikely(cancel)) { drbd_free_ee(mdev, peer_req); dec_unacked(mdev); - return 1; + return 0; } /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all @@ -1161,7 +1162,7 @@ long w_e_end_ov_reply(struct drbd_work *w, int cancel) else ov_oos_print(mdev); - ok = !drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, + err = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, eq ? ID_IN_SYNC : ID_OUT_OF_SYNC); dec_unacked(mdev); @@ -1177,23 +1178,23 @@ long w_e_end_ov_reply(struct drbd_work *w, int cancel) drbd_resync_finished(mdev); } - return ok; + return err; } -long w_prev_work_done(struct drbd_work *w, int cancel) +int w_prev_work_done(struct drbd_work *w, int cancel) { struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w); complete(&b->done); - return 1; + return 0; } -long w_send_barrier(struct drbd_work *w, int cancel) +int w_send_barrier(struct drbd_work *w, int cancel) { struct drbd_tl_epoch *b = container_of(w, struct drbd_tl_epoch, w); struct drbd_conf *mdev = w->mdev; struct p_barrier *p = &mdev->tconn->data.sbuf.barrier; - int ok = 1; + int err = 0; /* really avoid racing with tl_clear. w.cb may have been referenced * just before it was reassigned and re-queued, so double check that. @@ -1205,44 +1206,45 @@ long w_send_barrier(struct drbd_work *w, int cancel) cancel = 1; spin_unlock_irq(&mdev->tconn->req_lock); if (cancel) - return 1; - - if (drbd_get_data_sock(mdev->tconn)) return 0; + + err = drbd_get_data_sock(mdev->tconn); + if (err) + return err; p->barrier = b->br_number; /* inc_ap_pending was done where this was queued. * dec_ap_pending will be done in got_BarrierAck * or (on connection loss) in w_clear_epoch. */ - ok = !_drbd_send_cmd(mdev, mdev->tconn->data.socket, P_BARRIER, + err = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_BARRIER, &p->head, sizeof(*p), 0); drbd_put_data_sock(mdev->tconn); - return ok; + return err; } -long w_send_write_hint(struct drbd_work *w, int cancel) +int w_send_write_hint(struct drbd_work *w, int cancel) { struct drbd_conf *mdev = w->mdev; if (cancel) - return 1; - return !drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE); + return 0; + return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE); } -long w_send_oos(struct drbd_work *w, int cancel) +int w_send_oos(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); struct drbd_conf *mdev = w->mdev; - int ok; + int err; if (unlikely(cancel)) { req_mod(req, SEND_CANCELED); - return 1; + return 0; } - ok = !drbd_send_oos(mdev, req); + err = drbd_send_oos(mdev, req); req_mod(req, OOS_HANDED_TO_NETWORK); - return ok; + return err; } /** @@ -1251,21 +1253,21 @@ long w_send_oos(struct drbd_work *w, int cancel) * @w: work object. * @cancel: The connection will be closed anyways */ -long w_send_dblock(struct drbd_work *w, int cancel) +int w_send_dblock(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); struct drbd_conf *mdev = w->mdev; - int ok; + int err; if (unlikely(cancel)) { req_mod(req, SEND_CANCELED); - return 1; + return 0; } - ok = !drbd_send_dblock(mdev, req); - req_mod(req, ok ? HANDED_OVER_TO_NETWORK : SEND_FAILED); + err = drbd_send_dblock(mdev, req); + req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); - return ok; + return err; } /** @@ -1274,26 +1276,26 @@ long w_send_dblock(struct drbd_work *w, int cancel) * @w: work object. * @cancel: The connection will be closed anyways */ -long w_send_read_req(struct drbd_work *w, int cancel) +int w_send_read_req(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); struct drbd_conf *mdev = w->mdev; - int ok; + int err; if (unlikely(cancel)) { req_mod(req, SEND_CANCELED); - return 1; + return 0; } - ok = !drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size, + err = drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size, (unsigned long)req); - req_mod(req, ok ? HANDED_OVER_TO_NETWORK : SEND_FAILED); + req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); - return ok; + return err; } -long w_restart_disk_io(struct drbd_work *w, int cancel) +int w_restart_disk_io(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); struct drbd_conf *mdev = w->mdev; @@ -1309,7 +1311,7 @@ long w_restart_disk_io(struct drbd_work *w, int cancel) req->private_bio->bi_bdev = mdev->ldev->backing_bdev; generic_make_request(req->private_bio); - return 1; + return 0; } static int _drbd_may_sync_now(struct drbd_conf *mdev) @@ -1450,7 +1452,7 @@ void start_resync_timer_fn(unsigned long data) drbd_queue_work(&mdev->tconn->data.work, &mdev->start_resync_work); } -long w_start_resync(struct drbd_work *w, int cancel) +int w_start_resync(struct drbd_work *w, int cancel) { struct drbd_conf *mdev = w->mdev; @@ -1458,12 +1460,12 @@ long w_start_resync(struct drbd_work *w, int cancel) dev_warn(DEV, "w_start_resync later...\n"); mdev->start_resync_timer.expires = jiffies + HZ/10; add_timer(&mdev->start_resync_timer); - return 1; + return 0; } drbd_start_resync(mdev, C_SYNC_SOURCE); clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags); - return 1; + return 0; } /** @@ -1691,7 +1693,7 @@ int drbd_worker(struct drbd_thread *thi) list_del_init(&w->list); spin_unlock_irq(&tconn->data.work.q_lock); - if (!w->cb(w, tconn->cstate < C_WF_REPORT_PARAMS)) { + if (w->cb(w, tconn->cstate < C_WF_REPORT_PARAMS)) { /* dev_warn(DEV, "worker: a callback failed! \n"); */ if (tconn->cstate >= C_WF_REPORT_PARAMS) conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD); From e2b3032b902f600980dab9516d12e955c68aa1f4 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 17:16:12 +0100 Subject: [PATCH 239/609] drbd: drbd_process_done_ee(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index d3e3188575e..03c21e59463 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -409,7 +409,7 @@ static int drbd_process_done_ee(struct drbd_conf *mdev) LIST_HEAD(work_list); LIST_HEAD(reclaimed); struct drbd_peer_request *peer_req, *t; - int ok = 1; + int err = 0; spin_lock_irq(&mdev->tconn->req_lock); reclaim_net_ee(mdev, &reclaimed); @@ -424,13 +424,17 @@ static int drbd_process_done_ee(struct drbd_conf *mdev) * all ignore the last argument. */ list_for_each_entry_safe(peer_req, t, &work_list, w.list) { + int err2; + /* list_del not necessary, next/prev members not touched */ - ok = !peer_req->w.cb(&peer_req->w, !ok) && ok; + err2 = peer_req->w.cb(&peer_req->w, !!err); + if (!err) + err = err2; drbd_free_ee(mdev, peer_req); } wake_up(&mdev->ee_wait); - return ok; + return err; } void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head) @@ -4711,7 +4715,7 @@ static int tconn_process_done_ee(struct drbd_tconn *tconn) clear_bit(SIGNAL_ASENDER, &tconn->flags); flush_signals(current); idr_for_each_entry(&tconn->volumes, mdev, i) { - if (!drbd_process_done_ee(mdev)) + if (drbd_process_done_ee(mdev)) return 1; /* error */ } set_bit(SIGNAL_ASENDER, &tconn->flags); From 8172f3e9bf9bc239caec96382573b8d9b01baa09 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 17:22:39 +0100 Subject: [PATCH 240/609] drbd: decode_header(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 03c21e59463..5902d242dca 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -948,7 +948,7 @@ out_release_sockets: return -1; } -static bool decode_header(struct drbd_tconn *tconn, struct p_header *h, struct packet_info *pi) +static int decode_header(struct drbd_tconn *tconn, struct p_header *h, struct packet_info *pi) { if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) { pi->cmd = be16_to_cpu(h->h80.command); @@ -963,9 +963,9 @@ static bool decode_header(struct drbd_tconn *tconn, struct p_header *h, struct p be32_to_cpu(h->h80.magic), be16_to_cpu(h->h80.command), be16_to_cpu(h->h80.length)); - return false; + return -EINVAL; } - return true; + return 0; } static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi) @@ -980,7 +980,7 @@ static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi) return false; } - r = decode_header(tconn, h, pi); + r = !decode_header(tconn, h, pi); tconn->last_received = jiffies; return r; @@ -4845,7 +4845,7 @@ int drbd_asender(struct drbd_thread *thi) } if (received == expect && cmd == NULL) { - if (!decode_header(tconn, h, &pi)) + if (decode_header(tconn, h, &pi)) goto reconnect; cmd = &asender_tbl[pi.cmd]; if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd) { From 69bc7bc351967319bb3db2ecc38e60f9ca198b29 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 17:31:52 +0100 Subject: [PATCH 241/609] drbd: drbd_recv_header(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 38 +++++++++++++++++------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 5902d242dca..c07e81827cd 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -971,19 +971,21 @@ static int decode_header(struct drbd_tconn *tconn, struct p_header *h, struct pa static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi) { struct p_header *h = &tconn->data.rbuf.header; - int r; + int err; - r = drbd_recv(tconn, h, sizeof(*h)); - if (unlikely(r != sizeof(*h))) { + err = drbd_recv(tconn, h, sizeof(*h)); + if (unlikely(err != sizeof(*h))) { if (!signal_pending(current)) - conn_warn(tconn, "short read expecting header on sock: r=%d\n", r); - return false; + conn_warn(tconn, "short read expecting header on sock: r=%d\n", err); + if (err >= 0) + err = -EIO; + return err; } - r = !decode_header(tconn, h, pi); + err = decode_header(tconn, h, pi); tconn->last_received = jiffies; - return r; + return err; } static void drbd_flush(struct drbd_conf *mdev) @@ -3790,7 +3792,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, goto out; break; } - if (!drbd_recv_header(mdev->tconn, &pi)) + if (drbd_recv_header(mdev->tconn, &pi)) goto out; cmd = pi.cmd; data_size = pi.size; @@ -3941,7 +3943,7 @@ static void drbdd(struct drbd_tconn *tconn) while (get_t_state(&tconn->receiver) == RUNNING) { drbd_thread_current_set_cpu(&tconn->receiver); - if (!drbd_recv_header(tconn, &pi)) + if (drbd_recv_header(tconn, &pi)) goto err_out; if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || @@ -4179,8 +4181,8 @@ static int drbd_do_handshake(struct drbd_tconn *tconn) if (err) return 0; - rv = drbd_recv_header(tconn, &pi); - if (!rv) + err = drbd_recv_header(tconn, &pi); + if (err) return 0; if (pi.cmd != P_HAND_SHAKE) { @@ -4254,7 +4256,7 @@ static int drbd_do_auth(struct drbd_tconn *tconn) unsigned int resp_size; struct hash_desc desc; struct packet_info pi; - int rv; + int err, rv; desc.tfm = tconn->cram_hmac_tfm; desc.flags = 0; @@ -4273,9 +4275,11 @@ static int drbd_do_auth(struct drbd_tconn *tconn) if (!rv) goto fail; - rv = drbd_recv_header(tconn, &pi); - if (!rv) + err = drbd_recv_header(tconn, &pi); + if (err) { + rv = 0; goto fail; + } if (pi.cmd != P_AUTH_CHALLENGE) { conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n", @@ -4328,9 +4332,11 @@ static int drbd_do_auth(struct drbd_tconn *tconn) if (!rv) goto fail; - rv = drbd_recv_header(tconn, &pi); - if (!rv) + err = drbd_recv_header(tconn, &pi); + if (err) { + rv = 0; goto fail; + } if (pi.cmd != P_AUTH_RESPONSE) { conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n", From fc5be8397fb79b70142e6d32e5d9d063c42c8756 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 17:50:50 +0100 Subject: [PATCH 242/609] drbd: drbd_drain_block(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c07e81827cd..929a268689c 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1379,30 +1379,32 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, static int drbd_drain_block(struct drbd_conf *mdev, int data_size) { struct page *page; - int rr, rv = 1; + int rr, err = 0; void *data; if (!data_size) - return true; + return 0; page = drbd_pp_alloc(mdev, 1, 1); data = kmap(page); while (data_size) { - rr = drbd_recv(mdev->tconn, data, min_t(int, data_size, PAGE_SIZE)); - if (rr != min_t(int, data_size, PAGE_SIZE)) { - rv = 0; + unsigned int len = min_t(int, data_size, PAGE_SIZE); + + rr = drbd_recv(mdev->tconn, data, len); + if (rr != len) { if (!signal_pending(current)) dev_warn(DEV, "short read receiving data: read %d expected %d\n", - rr, min_t(int, data_size, PAGE_SIZE)); + rr, len); + err = (rr < 0) ? rr : -EIO; break; } data_size -= rr; } kunmap(page); drbd_pp_free(mdev, page, 0); - return rv; + return err; } static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, @@ -1593,7 +1595,7 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd, if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Can not write resync data to local disk.\n"); - ok = drbd_drain_block(mdev, data_size); + ok = !drbd_drain_block(mdev, data_size); drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size); } @@ -1979,7 +1981,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, err = wait_for_and_update_peer_seq(mdev, peer_seq); drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size); atomic_inc(&mdev->current_epoch->epoch_size); - return drbd_drain_block(mdev, data_size) && err == 0; + return !drbd_drain_block(mdev, data_size) && err == 0; } /* @@ -2188,7 +2190,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, "no local data.\n"); /* drain possibly payload */ - return drbd_drain_block(mdev, digest_size); + return !drbd_drain_block(mdev, digest_size); } /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD From 28284ceff043b14defcf8122b765fe0b21bd9344 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 17:54:02 +0100 Subject: [PATCH 243/609] drbd: recv_dless_read(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 929a268689c..cca2da70276 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1426,7 +1426,7 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, dev_warn(DEV, "short read receiving data reply digest: read %d expected %d\n", rr, dgs); - return 0; + return rr < 0 ? rr : -EIO; } } @@ -1450,7 +1450,7 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, dev_warn(DEV, "short read receiving data reply: " "read %d expected %d\n", rr, expect); - return 0; + return rr < 0 ? rr : -EIO; } data_size -= rr; } @@ -1459,12 +1459,12 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv); if (memcmp(dig_in, dig_vv, dgs)) { dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n"); - return 0; + return -EINVAL; } } D_ASSERT(data_size == 0); - return 1; + return 0; } /* e_end_resync_block() is called via @@ -1565,7 +1565,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd, /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid * special casing it there for the various failure cases. * still no race with drbd_fail_pending_reads */ - ok = recv_dless_read(mdev, req, sector, data_size); + ok = !recv_dless_read(mdev, req, sector, data_size); if (ok) req_mod(req, DATA_RECEIVED); From e1c1b0fc8fbda14fd4470ba3a24ce6abdc11436f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 17:58:27 +0100 Subject: [PATCH 244/609] drbd: recv_resync_read(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index cca2da70276..28980145b92 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1515,7 +1515,7 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si atomic_add(data_size >> 9, &mdev->rs_sect_ev); if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0) - return true; + return 0; /* don't care for the reason here */ dev_err(DEV, "submit failed, triggering re-connect\n"); @@ -1526,7 +1526,7 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si drbd_free_ee(mdev, peer_req); fail: put_ldev(mdev); - return false; + return -EIO; } static struct drbd_request * @@ -1590,7 +1590,7 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd, /* data is submitted to disk within recv_resync_read. * corresponding put_ldev done below on error, * or in drbd_peer_request_endio. */ - ok = recv_resync_read(mdev, sector, data_size); + ok = !recv_resync_read(mdev, sector, data_size); } else { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Can not write resync data to local disk.\n"); From a982dd579cbc99f7c4b81e68b9c99f7a6481f45b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 10 Dec 2010 00:45:25 +0100 Subject: [PATCH 245/609] drbd: send_bitmap_rle_or_plain(): Error handling cleanup Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2e7fd4d326d..6c4454c9b4d 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1123,8 +1123,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, { struct p_compressed_bm *p = (void*)h; unsigned long num_words; - int len; - int ok; + int len, err; len = fill_bitmap_rle_bits(mdev, p, c); @@ -1133,7 +1132,8 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, if (len) { DCBP_set_code(p, RLE_VLI_Bits); - ok = !_drbd_send_cmd(mdev, mdev->tconn->data.socket, P_COMPRESSED_BITMAP, h, + err = _drbd_send_cmd(mdev, mdev->tconn->data.socket, + P_COMPRESSED_BITMAP, h, sizeof(*p) + len, 0); c->packets[0]++; @@ -1148,7 +1148,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, len = num_words * sizeof(long); if (len) drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload); - ok = !_drbd_send_cmd(mdev, mdev->tconn->data.socket, P_BITMAP, + err = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_BITMAP, h, sizeof(struct p_header80) + len, 0); c->word_offset += num_words; c->bit_offset = c->word_offset * BITS_PER_LONG; @@ -1159,7 +1159,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, if (c->bit_offset > c->bm_bits) c->bit_offset = c->bm_bits; } - if (ok) { + if (!err) { if (len == 0) { INFO_bm_xfer_stats(mdev, "send", c); return 0; From c696774691c9f9ec238fe3b1892f78796a985852 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 17 Mar 2011 17:15:20 +0100 Subject: [PATCH 246/609] drbd: Add drbd_recv_all(): Receive an entire buffer Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 28980145b92..880634264e9 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -565,6 +565,19 @@ static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size) return rv; } +static int drbd_recv_all(struct drbd_tconn *tconn, void *buf, size_t size) +{ + int err; + + err = drbd_recv(tconn, buf, size); + if (err != size) { + if (err >= 0) + err = -EIO; + } else + err = 0; + return err; +} + /* quoting tcp(7): * On individual connections, the socket buffer size must be set prior to the * listen(2) or connect(2) calls in order to have it take effect. From 82bc01940ac9142a89b1823c0f2ea2623a92e18d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 17 Mar 2011 12:10:19 +0100 Subject: [PATCH 247/609] drbd: Make all command handlers return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 177 +++++++++++++++-------------- 1 file changed, 93 insertions(+), 84 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 880634264e9..5fc536727af 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1246,7 +1246,7 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd, switch (mdev->write_ordering) { case WO_none: if (rv == FE_RECYCLED) - return true; + return 0; /* receiver context, in the writeout path of the other node. * avoid potential distributed deadlock */ @@ -1274,10 +1274,10 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd, D_ASSERT(atomic_read(&epoch->active) == 0); D_ASSERT(epoch->flags == 0); - return true; + return 0; default: dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering); - return false; + return -EIO; } epoch->flags = 0; @@ -1295,7 +1295,7 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd, } spin_unlock(&mdev->epoch_lock); - return true; + return 0; } /* used from receive_RSDataReply (recv_resync_read) @@ -1564,7 +1564,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd, { struct drbd_request *req; sector_t sector; - int ok; + int err; struct p_data *p = &mdev->tconn->data.rbuf.data; sector = be64_to_cpu(p->sector); @@ -1573,27 +1573,26 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd, req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__); spin_unlock_irq(&mdev->tconn->req_lock); if (unlikely(!req)) - return false; + return -EIO; /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid * special casing it there for the various failure cases. * still no race with drbd_fail_pending_reads */ - ok = !recv_dless_read(mdev, req, sector, data_size); - - if (ok) + err = recv_dless_read(mdev, req, sector, data_size); + if (!err) req_mod(req, DATA_RECEIVED); /* else: nothing. handled from drbd_disconnect... * I don't think we may complete this just yet * in case we are "on-disconnect: freeze" */ - return ok; + return err; } static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd, unsigned int data_size) { sector_t sector; - int ok; + int err; struct p_data *p = &mdev->tconn->data.rbuf.data; sector = be64_to_cpu(p->sector); @@ -1603,19 +1602,19 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd, /* data is submitted to disk within recv_resync_read. * corresponding put_ldev done below on error, * or in drbd_peer_request_endio. */ - ok = !recv_resync_read(mdev, sector, data_size); + err = recv_resync_read(mdev, sector, data_size); } else { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Can not write resync data to local disk.\n"); - ok = !drbd_drain_block(mdev, data_size); + err = drbd_drain_block(mdev, data_size); drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size); } atomic_add(data_size >> 9, &mdev->rs_sect_in); - return ok; + return err; } static int w_restart_write(struct drbd_work *w, int cancel) @@ -1989,12 +1988,16 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, u32 dp_flags; int err; - if (!get_ldev(mdev)) { + int err2; + err = wait_for_and_update_peer_seq(mdev, peer_seq); drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size); atomic_inc(&mdev->current_epoch->epoch_size); - return !drbd_drain_block(mdev, data_size) && err == 0; + err2 = drbd_drain_block(mdev, data_size); + if (!err) + err = err2; + return err; } /* @@ -2007,7 +2010,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, peer_req = read_in_block(mdev, p->block_id, sector, data_size); if (!peer_req) { put_ldev(mdev); - return false; + return -EIO; } peer_req->w.cb = e_end_block; @@ -2034,7 +2037,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, spin_unlock_irq(&mdev->tconn->req_lock); if (err == -ENOENT) { put_ldev(mdev); - return true; + return 0; } goto out_interrupted; } @@ -2067,8 +2070,9 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, drbd_al_begin_io(mdev, peer_req->i.sector); } - if (drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR) == 0) - return true; + err = drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR); + if (!err) + return 0; /* don't care for the reason here */ dev_err(DEV, "submit failed, triggering re-connect\n"); @@ -2083,7 +2087,7 @@ out_interrupted: drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP); put_ldev(mdev); drbd_free_ee(mdev, peer_req); - return false; + return err; } /* We may throttle resync, if the lower device seems to be busy, @@ -2170,12 +2174,12 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, (unsigned long long)sector, size); - return false; + return -EINVAL; } if (sector + (size>>9) > capacity) { dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, (unsigned long long)sector, size); - return false; + return -EINVAL; } if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) { @@ -2203,7 +2207,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, "no local data.\n"); /* drain possibly payload */ - return !drbd_drain_block(mdev, digest_size); + return drbd_drain_block(mdev, digest_size); } /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD @@ -2212,7 +2216,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, peer_req = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO); if (!peer_req) { put_ldev(mdev); - return false; + return -ENOMEM; } switch (cmd) { @@ -2325,7 +2329,7 @@ submit: spin_unlock_irq(&mdev->tconn->req_lock); if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0) - return true; + return 0; /* don't care for the reason here */ dev_err(DEV, "submit failed, triggering re-connect\n"); @@ -2337,7 +2341,7 @@ submit: out_free_e: put_ldev(mdev); drbd_free_ee(mdev, peer_req); - return false; + return -EIO; } static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) @@ -2932,9 +2936,11 @@ static int receive_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd, if (tconn->agreed_pro_version >= 87) { unsigned char *my_alg = tconn->net_conf->integrity_alg; + int err; - if (drbd_recv(tconn, p_integrity_alg, data_size) != data_size) - return false; + err = drbd_recv_all(tconn, p_integrity_alg, data_size); + if (err) + return err; p_integrity_alg[SHARED_SECRET_MAX-1] = 0; if (strcmp(p_integrity_alg, my_alg)) { @@ -2945,11 +2951,11 @@ static int receive_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd, my_alg[0] ? my_alg : (unsigned char *)""); } - return true; + return 0; disconnect: conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); - return false; + return -EIO; } /* helper function @@ -2982,7 +2988,6 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, unsigned int packet_size) { - int ok = true; struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95; unsigned int header_size, data_size, exp_max_sz; struct crypto_hash *verify_tfm = NULL; @@ -2990,6 +2995,7 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, const int apv = mdev->tconn->agreed_pro_version; int *rs_plan_s = NULL; int fifo_size = 0; + int err; exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param) : apv == 88 ? sizeof(struct p_rs_param) @@ -3000,7 +3006,7 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, if (packet_size > exp_max_sz) { dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n", packet_size, exp_max_sz); - return false; + return -EIO; } if (apv <= 88) { @@ -3019,8 +3025,9 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, /* initialize verify_alg and csums_alg */ memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); - if (drbd_recv(mdev->tconn, &p->head.payload, header_size) != header_size) - return false; + err = drbd_recv_all(mdev->tconn, &p->head.payload, header_size); + if (err) + return err; if (get_ldev(mdev)) { mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate); @@ -3033,11 +3040,12 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, dev_err(DEV, "verify-alg too long, " "peer wants %u, accepting only %u byte\n", data_size, SHARED_SECRET_MAX); - return false; + return -EIO; } - if (drbd_recv(mdev->tconn, p->verify_alg, data_size) != data_size) - return false; + err = drbd_recv_all(mdev->tconn, p->verify_alg, data_size); + if (err) + return err; /* we expect NUL terminated string */ /* but just in case someone tries to be evil */ @@ -3124,8 +3132,8 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, } spin_unlock(&mdev->peer_seq_lock); } + return 0; - return ok; disconnect: /* just for completeness: actually not needed, * as this is not reached if csums_tfm was ok. */ @@ -3133,7 +3141,7 @@ disconnect: /* but free the verify_tfm again, if csums_tfm did not work out */ crypto_free_hash(verify_tfm); conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); - return false; + return -EIO; } /* warn if the arguments differ by more than 12.5% */ @@ -3195,7 +3203,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd, conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); mdev->ldev->dc.disk_size = my_usize; put_ldev(mdev); - return false; + return -EIO; } put_ldev(mdev); } @@ -3205,7 +3213,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd, dd = drbd_determine_dev_size(mdev, ddsf); put_ldev(mdev); if (dd == dev_size_error) - return false; + return -EIO; drbd_md_sync(mdev); } else { /* I am diskless, need to accept the peer's size. */ @@ -3244,7 +3252,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd, } } - return true; + return 0; } static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd, @@ -3269,7 +3277,7 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd, dev_err(DEV, "Can only connect to data with current UUID=%016llX\n", (unsigned long long)mdev->ed_uuid); conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); - return false; + return -EIO; } if (get_ldev(mdev)) { @@ -3310,7 +3318,7 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd, if (updated_uuids) drbd_print_uuids(mdev, "receiver updated UUIDs to"); - return true; + return 0; } /** @@ -3356,7 +3364,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd, if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) && mutex_is_locked(mdev->state_mutex)) { drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG); - return true; + return 0; } mask = convert_state(mask); @@ -3367,7 +3375,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd, drbd_md_sync(mdev); - return true; + return 0; } static int receive_req_conn_state(struct drbd_tconn *tconn, enum drbd_packet cmd, @@ -3383,7 +3391,7 @@ static int receive_req_conn_state(struct drbd_tconn *tconn, enum drbd_packet cmd if (test_bit(DISCARD_CONCURRENT, &tconn->flags) && mutex_is_locked(&tconn->cstate_mutex)) { conn_send_sr_reply(tconn, SS_CONCURRENT_ST_CHG); - return true; + return 0; } mask = convert_state(mask); @@ -3392,7 +3400,7 @@ static int receive_req_conn_state(struct drbd_tconn *tconn, enum drbd_packet cmd rv = conn_request_state(tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY); conn_send_sr_reply(tconn, rv); - return true; + return 0; } static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd, @@ -3438,7 +3446,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd, peer_state.conn == C_CONNECTED) { if (drbd_bm_total_weight(mdev) <= mdev->rs_failed) drbd_resync_finished(mdev); - return true; + return 0; } } @@ -3491,10 +3499,10 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd, real_peer_disk = D_DISKLESS; } else { if (test_and_clear_bit(CONN_DRY_RUN, &mdev->tconn->flags)) - return false; + return -EIO; D_ASSERT(os.conn == C_WF_REPORT_PARAMS); conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); - return false; + return -EIO; } } } @@ -3519,7 +3527,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd, drbd_uuid_new_current(mdev); clear_bit(NEW_CUR_UUID, &mdev->flags); conn_request_state(mdev->tconn, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD); - return false; + return -EIO; } rv = _drbd_set_state(mdev, ns, cs_flags, NULL); ns = mdev->state; @@ -3527,7 +3535,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd, if (rv < SS_SUCCESS) { conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); - return false; + return -EIO; } if (os.conn > C_WF_REPORT_PARAMS) { @@ -3545,7 +3553,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd, drbd_md_sync(mdev); /* update connected indicator, la_size, ... */ - return true; + return 0; } static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd, @@ -3574,7 +3582,7 @@ static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd, } else dev_err(DEV, "Ignoring SyncUUID packet!\n"); - return true; + return 0; } /** @@ -3597,12 +3605,9 @@ receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size, } if (want == 0) return 0; - err = drbd_recv(mdev->tconn, buffer, want); - if (err != want) { - if (err >= 0) - err = -EIO; + err = drbd_recv_all(mdev->tconn, buffer, want); + if (err) return err; - } drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer); @@ -3751,7 +3756,6 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, struct bm_xfer_ctx c; void *buffer; int err; - int ok = false; struct p_header *h = &mdev->tconn->data.rbuf.header; struct packet_info pi; @@ -3764,6 +3768,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, buffer = (unsigned long *) __get_free_page(GFP_NOIO); if (!buffer) { dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__); + err = -ENOMEM; goto out; } @@ -3782,20 +3787,24 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, if (data_size > BM_PACKET_PAYLOAD_BYTES) { dev_err(DEV, "ReportCBitmap packet too large\n"); + err = -EIO; goto out; } /* use the page buff */ p = buffer; memcpy(p, h, sizeof(*h)); - if (drbd_recv(mdev->tconn, p->head.payload, data_size) != data_size) - goto out; + err = drbd_recv_all(mdev->tconn, p->head.payload, data_size); + if (err) + goto out; if (data_size <= (sizeof(*p) - sizeof(p->head))) { dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size); + err = -EIO; goto out; } err = decode_bitmap_c(mdev, p, &c, data_size); } else { dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd); + err = -EIO; goto out; } @@ -3807,7 +3816,8 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, goto out; break; } - if (drbd_recv_header(mdev->tconn, &pi)) + err = drbd_recv_header(mdev->tconn, &pi); + if (err) goto out; cmd = pi.cmd; data_size = pi.size; @@ -3818,8 +3828,8 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, if (mdev->state.conn == C_WF_BITMAP_T) { enum drbd_state_rv rv; - ok = !drbd_send_bitmap(mdev); - if (!ok) + err = drbd_send_bitmap(mdev); + if (err) goto out; /* Omit CS_ORDERED with this state transition to avoid deadlocks. */ rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); @@ -3830,14 +3840,14 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n", drbd_conn_str(mdev->state.conn)); } + err = 0; - ok = true; out: drbd_bm_unlock(mdev); - if (ok && mdev->state.conn == C_WF_BITMAP_S) + if (!err && mdev->state.conn == C_WF_BITMAP_S) drbd_start_resync(mdev, C_SYNC_SOURCE); free_page((unsigned long) buffer); - return ok; + return err; } static int _tconn_receive_skip(struct drbd_tconn *tconn, unsigned int data_size) @@ -3854,7 +3864,7 @@ static int _tconn_receive_skip(struct drbd_tconn *tconn, unsigned int data_size) break; size -= r; } - return size == 0; + return size ? -EIO : 0; } static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd, @@ -3881,7 +3891,7 @@ static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd, * with the data requests being unplugged */ drbd_tcp_quickack(mdev->tconn->data.socket); - return true; + return 0; } static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd, @@ -3901,7 +3911,7 @@ static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd, drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize)); - return true; + return 0; } struct data_cmd { @@ -3954,7 +3964,7 @@ static void drbdd(struct drbd_tconn *tconn) struct p_header *header = &tconn->data.rbuf.header; struct packet_info pi; size_t shs; /* sub header size */ - int rv; + int err; while (get_t_state(&tconn->receiver) == RUNNING) { drbd_thread_current_set_cpu(&tconn->receiver); @@ -3974,34 +3984,33 @@ static void drbdd(struct drbd_tconn *tconn) } if (shs) { - rv = drbd_recv(tconn, &header->payload, shs); - if (unlikely(rv != shs)) { + err = drbd_recv_all(tconn, &header->payload, shs); + if (err) { if (!signal_pending(current)) - conn_warn(tconn, "short read while reading sub header: rv=%d\n", rv); + conn_warn(tconn, "short read while reading sub header: rv=%d\n", err); goto err_out; } } if (drbd_cmd_handler[pi.cmd].fa_type == CONN) { - rv = drbd_cmd_handler[pi.cmd].conn_fn(tconn, pi.cmd, pi.size - shs); + err = drbd_cmd_handler[pi.cmd].conn_fn(tconn, pi.cmd, pi.size - shs); } else { struct drbd_conf *mdev = vnr_to_mdev(tconn, pi.vnr); - rv = mdev ? + err = mdev ? drbd_cmd_handler[pi.cmd].mdev_fn(mdev, pi.cmd, pi.size - shs) : tconn_receive_skip(tconn, pi.cmd, pi.size - shs); } - if (unlikely(!rv)) { + if (unlikely(err)) { conn_err(tconn, "error receiving %s, l: %d!\n", cmdname(pi.cmd), pi.size); goto err_out; } } + return; - if (0) { - err_out: - conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD); - } + err_out: + conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD); } void conn_flush_workqueue(struct drbd_tconn *tconn) From 22ab6a30b8225b35b02202ee79dd270ddd09e948 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 13 Dec 2010 01:44:11 +0100 Subject: [PATCH 248/609] drbd: drbd_bm_read() never returns a positive value through drbd_bitmap_io() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index d903fb5ea41..f1ec727f7df 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1488,7 +1488,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } } else { if (drbd_bitmap_io(mdev, &drbd_bm_read, - "read from attaching", BM_LOCKED_MASK) < 0) { + "read from attaching", BM_LOCKED_MASK)) { retcode = ERR_IO_MD_DISK; goto force_diskless_dec; } From ac29f4039aca8dd98c631c8b5c66f3dd76c2408c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 13 Dec 2010 02:20:47 +0100 Subject: [PATCH 249/609] drbd: _drbd_md_sync_page_io(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index fe60b18c1da..6517c91de4b 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -115,7 +115,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, { struct bio *bio; struct drbd_md_io md_io; - int ok; + int err; md_io.mdev = mdev; init_completion(&md_io.event); @@ -128,8 +128,8 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, bio = bio_alloc_drbd(GFP_NOIO); bio->bi_bdev = bdev->md_bdev; bio->bi_sector = sector; - ok = (bio_add_page(bio, page, size, 0) == size); - if (!ok) + err = -EIO; + if (bio_add_page(bio, page, size, 0) != size) goto out; bio->bi_private = &md_io; bio->bi_end_io = drbd_md_io_complete; @@ -140,11 +140,12 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, else submit_bio(rw, bio); wait_for_completion(&md_io.event); - ok = bio_flagged(bio, BIO_UPTODATE) && md_io.error == 0; + if (bio_flagged(bio, BIO_UPTODATE)) + err = md_io.error; out: bio_put(bio); - return ok; + return err; } int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, @@ -167,7 +168,7 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, current->comm, current->pid, __func__, (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); - ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, MD_BLOCK_SIZE); + ok = !_drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, MD_BLOCK_SIZE); if (unlikely(!ok)) { dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed!\n", (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); From 3fbf4d21ae9b7a61ce56c80573b64a1cc18c10ce Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 13 Dec 2010 02:25:41 +0100 Subject: [PATCH 250/609] drbd: drbd_md_sync_page_io(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 14 ++++++-------- drivers/block/drbd/drbd_main.c | 4 ++-- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 6517c91de4b..586776195a7 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -151,7 +151,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, sector_t sector, int rw) { - int ok; + int err; struct page *iop = mdev->md_io_page; D_ASSERT(mutex_is_locked(&mdev->md_io_mutex)); @@ -168,14 +168,12 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, current->comm, current->pid, __func__, (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); - ok = !_drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, MD_BLOCK_SIZE); - if (unlikely(!ok)) { + err = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, MD_BLOCK_SIZE); + if (err) { dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed!\n", (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); - return 0; } - - return ok; + return err; } static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) @@ -392,7 +390,7 @@ w_al_write_transaction(struct drbd_work *w, int unused) if (drbd_bm_write_hinted(mdev)) aw->err = -EIO; /* drbd_chk_io_error done already */ - else if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { + else if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { aw->err = -EIO; drbd_chk_io_error(mdev, 1, true); } else { @@ -439,7 +437,7 @@ static int drbd_al_read_tr(struct drbd_conf *mdev, /* Dont process error normally, * as this is done before disk is attached! */ - if (!drbd_md_sync_page_io(mdev, bdev, sector, READ)) + if (drbd_md_sync_page_io(mdev, bdev, sector, READ)) return -1; if (!expect(b->magic == cpu_to_be32(DRBD_AL_MAGIC))) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 6c4454c9b4d..43b9b7a067b 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2637,7 +2637,7 @@ void drbd_md_sync(struct drbd_conf *mdev) D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset); sector = mdev->ldev->md.md_offset; - if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { + if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { /* this was a try anyways ... */ dev_err(DEV, "meta data update failed!\n"); drbd_chk_io_error(mdev, 1, true); @@ -2670,7 +2670,7 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) mutex_lock(&mdev->md_io_mutex); buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); - if (!drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) { + if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) { /* NOTE: can't do normal error processing here as this is called BEFORE disk is attached */ dev_err(DEV, "Error while reading metadata.\n"); From 69f5ec728c05e69b09cf37d7b3a1e5f394ec4a4d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 13 Dec 2010 22:50:15 +0100 Subject: [PATCH 251/609] drbd: Remove duplicate initialization Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 79e4a90a6c7..52ff1c7379e 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -984,7 +984,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, { enum drbd_fencing_p fp; enum drbd_req_event what = NOTHING; - union drbd_state nsm = (union drbd_state){ .i = -1 }; + union drbd_state nsm; struct sib_info sib; sib.sib_reason = SIB_STATE_CHANGE; From 1097e9a80cc1820501ae01e35f8c2872628fc86f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 17 Dec 2010 23:08:33 +0100 Subject: [PATCH 252/609] drbd: Remove unnecessary assertion This is also checked further below in the same function. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index fa799e372ba..f91f9c9a815 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1010,7 +1010,6 @@ int drbd_make_request(struct request_queue *q, struct bio *bio) */ D_ASSERT(bio->bi_size > 0); D_ASSERT(IS_ALIGNED(bio->bi_size, 512)); - D_ASSERT(bio->bi_idx == 0); /* to make some things easier, force alignment of requests within the * granularity of our hash tables */ From 0da34df0d05d004c1ec40f4c30a325d8727b0b33 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 19 Dec 2010 20:48:29 +0100 Subject: [PATCH 253/609] drbd: drbd_may_do_local_read(): Use bool/true/false Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index f91f9c9a815..3430dab8572 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -671,17 +671,16 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * since size may be bigger than BM_BLOCK_SIZE, * we may need to check several bits. */ -static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size) +static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size) { unsigned long sbnr, ebnr; sector_t esector, nr_sectors; if (mdev->state.disk == D_UP_TO_DATE) - return 1; + return true; if (mdev->state.disk != D_INCONSISTENT) - return 0; + return false; esector = sector + (size >> 9) - 1; - nr_sectors = drbd_get_capacity(mdev->this_bdev); D_ASSERT(sector < nr_sectors); D_ASSERT(esector < nr_sectors); @@ -689,7 +688,7 @@ static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int s sbnr = BM_SECT_TO_BIT(sector); ebnr = BM_SECT_TO_BIT(esector); - return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr); + return drbd_bm_count_bits(mdev, sbnr, ebnr) == 0; } /* From 8f7bed77740c7418074e6ba82c646a7dd035e6cf Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 19 Dec 2010 23:53:14 +0100 Subject: [PATCH 254/609] drbd: Rename various functions from *_oos_* to *_out_of_sync_* for clarity Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 8 ++++---- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_receiver.c | 6 +++--- drivers/block/drbd/drbd_req.c | 6 +++--- drivers/block/drbd/drbd_req.h | 2 +- drivers/block/drbd/drbd_worker.c | 14 +++++++------- 6 files changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 300e85ef94d..837ac36f2d3 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1191,7 +1191,7 @@ extern void drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd, struct p_data *dp, int data_size); extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd, sector_t sector, int blksize, u64 block_id); -extern int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req); +extern int drbd_send_out_of_sync(struct drbd_conf *, struct drbd_request *); extern int drbd_send_block(struct drbd_conf *, enum drbd_packet, struct drbd_peer_request *); extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req); @@ -1528,10 +1528,10 @@ extern int drbd_resync_finished(struct drbd_conf *mdev); /* maybe rather drbd_main.c ? */ extern int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, sector_t sector, int rw); -extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int); +extern void drbd_ov_out_of_sync_found(struct drbd_conf *, sector_t, int); extern void drbd_rs_controller_reset(struct drbd_conf *mdev); -static inline void ov_oos_print(struct drbd_conf *mdev) +static inline void ov_out_of_sync_print(struct drbd_conf *mdev) { if (mdev->ov_last_oos_size) { dev_err(DEV, "Out of sync: start=%llu, size=%lu (sectors)\n", @@ -1562,7 +1562,7 @@ extern int w_send_read_req(struct drbd_work *, int); extern int w_prev_work_done(struct drbd_work *, int); extern int w_e_reissue(struct drbd_work *, int); extern int w_restart_disk_io(struct drbd_work *, int); -extern int w_send_oos(struct drbd_work *, int); +extern int w_send_out_of_sync(struct drbd_work *, int); extern int w_start_resync(struct drbd_work *, int); extern void resync_timer_fn(unsigned long data); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 43b9b7a067b..f633c0bd52e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1652,7 +1652,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, return err; } -int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req) +int drbd_send_out_of_sync(struct drbd_conf *mdev, struct drbd_request *req) { struct p_block_desc p; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 5fc536727af..fc05d961466 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4699,9 +4699,9 @@ static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd) update_peer_seq(mdev, be32_to_cpu(p->seq_num)); if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC) - drbd_ov_oos_found(mdev, sector, size); + drbd_ov_out_of_sync_found(mdev, sector, size); else - ov_oos_print(mdev); + ov_out_of_sync_print(mdev); if (!get_ldev(mdev)) return true; @@ -4723,7 +4723,7 @@ static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd) drbd_queue_work_front(&mdev->tconn->data.work, w); } else { dev_err(DEV, "kmalloc(w) failed."); - ov_oos_print(mdev); + ov_out_of_sync_print(mdev); drbd_resync_finished(mdev); } } diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 3430dab8572..e380ffb7f4f 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -477,7 +477,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, case QUEUE_FOR_SEND_OOS: req->rq_state |= RQ_NET_QUEUED; - req->w.cb = w_send_oos; + req->w.cb = w_send_out_of_sync; drbd_queue_work(&mdev->tconn->data.work, &req->w); break; @@ -786,7 +786,7 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s } remote = remote && drbd_should_do_remote(mdev->state); - send_oos = rw == WRITE && drbd_should_send_oos(mdev->state); + send_oos = rw == WRITE && drbd_should_send_out_of_sync(mdev->state); D_ASSERT(!(remote && send_oos)); if (!(local || remote) && !is_susp(mdev->state)) { @@ -842,7 +842,7 @@ allocate_barrier: if (remote || send_oos) { remote = drbd_should_do_remote(mdev->state); - send_oos = rw == WRITE && drbd_should_send_oos(mdev->state); + send_oos = rw == WRITE && drbd_should_send_out_of_sync(mdev->state); D_ASSERT(!(remote && send_oos)); if (!(remote || send_oos)) diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 8c8c2588c4b..7fb3e06369d 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -305,7 +305,7 @@ static inline bool drbd_should_do_remote(union drbd_state s) That is equivalent since before 96 IO was frozen in the C_WF_BITMAP* states. */ } -static inline bool drbd_should_send_oos(union drbd_state s) +static inline bool drbd_should_send_out_of_sync(union drbd_state s) { return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S; /* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 0f0beec9a19..185cd25b18d 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -714,7 +714,7 @@ int w_ov_finished(struct drbd_work *w, int cancel) { struct drbd_conf *mdev = w->mdev; kfree(w); - ov_oos_print(mdev); + ov_out_of_sync_print(mdev); drbd_resync_finished(mdev); return 0; @@ -1102,7 +1102,7 @@ out: return err; } -void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size) +void drbd_ov_out_of_sync_found(struct drbd_conf *mdev, sector_t sector, int size) { if (mdev->ov_last_oos_start + mdev->ov_last_oos_size == sector) { mdev->ov_last_oos_size += size>>9; @@ -1158,9 +1158,9 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) * drbd_pp_alloc due to pp_in_use > max_buffers. */ drbd_free_ee(mdev, peer_req); if (!eq) - drbd_ov_oos_found(mdev, sector, size); + drbd_ov_out_of_sync_found(mdev, sector, size); else - ov_oos_print(mdev); + ov_out_of_sync_print(mdev); err = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, eq ? ID_IN_SYNC : ID_OUT_OF_SYNC); @@ -1174,7 +1174,7 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) drbd_advance_rs_marks(mdev, mdev->ov_left); if (mdev->ov_left == 0) { - ov_oos_print(mdev); + ov_out_of_sync_print(mdev); drbd_resync_finished(mdev); } @@ -1230,7 +1230,7 @@ int w_send_write_hint(struct drbd_work *w, int cancel) return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE); } -int w_send_oos(struct drbd_work *w, int cancel) +int w_send_out_of_sync(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); struct drbd_conf *mdev = w->mdev; @@ -1241,7 +1241,7 @@ int w_send_oos(struct drbd_work *w, int cancel) return 0; } - err = drbd_send_oos(mdev, req); + err = drbd_send_out_of_sync(mdev, req); req_mod(req, OOS_HANDED_TO_NETWORK); return err; From 309a834896a4ab7dc762b7f8bb3ed1dd8153162c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 21 Dec 2010 12:38:39 +0100 Subject: [PATCH 255/609] drbd: Get rid of typedef drbd_work_cb This type is not used anywhere else. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 837ac36f2d3..3a046e418aa 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -643,11 +643,9 @@ static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi) return thi->t_state; } -struct drbd_work; -typedef int (*drbd_work_cb)(struct drbd_work *, int cancel); struct drbd_work { struct list_head list; - drbd_work_cb cb; + int (*cb)(struct drbd_work *, int cancel); union { struct drbd_conf *mdev; struct drbd_tconn *tconn; From a5c31904350182b0138f0bd1cec9bc2c4296f71f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 24 Mar 2011 03:28:04 +0100 Subject: [PATCH 256/609] drbd: Introduce and use drbd_recv_all_warn() The pattern of receiving a fixed number of bytes and warning if a short packet is received and the receiver has not actively been interruped is repeated many times; clean that up. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 113 ++++++++++------------------- 1 file changed, 40 insertions(+), 73 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index fc05d961466..208a2ec5493 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -578,6 +578,16 @@ static int drbd_recv_all(struct drbd_tconn *tconn, void *buf, size_t size) return err; } +static int drbd_recv_all_warn(struct drbd_tconn *tconn, void *buf, size_t size) +{ + int err; + + err = drbd_recv_all(tconn, buf, size); + if (err && !signal_pending(current)) + conn_warn(tconn, "short read (expected size %d)\n", (int)size); + return err; +} + /* quoting tcp(7): * On individual connections, the socket buffer size must be set prior to the * listen(2) or connect(2) calls in order to have it take effect. @@ -986,14 +996,9 @@ static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi) struct p_header *h = &tconn->data.rbuf.header; int err; - err = drbd_recv(tconn, h, sizeof(*h)); - if (unlikely(err != sizeof(*h))) { - if (!signal_pending(current)) - conn_warn(tconn, "short read expecting header on sock: r=%d\n", err); - if (err >= 0) - err = -EIO; + err = drbd_recv_all_warn(tconn, h, sizeof(*h)); + if (err) return err; - } err = decode_header(tconn, h, pi); tconn->last_received = jiffies; @@ -1307,7 +1312,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, const sector_t capacity = drbd_get_capacity(mdev->this_bdev); struct drbd_peer_request *peer_req; struct page *page; - int dgs, ds, rr; + int dgs, ds, err; void *dig_in = mdev->tconn->int_dig_in; void *dig_vv = mdev->tconn->int_dig_vv; unsigned long *data; @@ -1316,14 +1321,9 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0; if (dgs) { - rr = drbd_recv(mdev->tconn, dig_in, dgs); - if (rr != dgs) { - if (!signal_pending(current)) - dev_warn(DEV, - "short read receiving data digest: read %d expected %d\n", - rr, dgs); + err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs); + if (err) return NULL; - } } data_size -= dgs; @@ -1357,20 +1357,17 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, page_chain_for_each(page) { unsigned len = min_t(int, ds, PAGE_SIZE); data = kmap(page); - rr = drbd_recv(mdev->tconn, data, len); + err = drbd_recv_all_warn(mdev->tconn, data, len); if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) { dev_err(DEV, "Fault injection: Corrupting data on receive\n"); data[0] = data[0] ^ (unsigned long)-1; } kunmap(page); - if (rr != len) { + if (err) { drbd_free_ee(mdev, peer_req); - if (!signal_pending(current)) - dev_warn(DEV, "short read receiving data: read %d expected %d\n", - rr, len); return NULL; } - ds -= rr; + ds -= len; } if (dgs) { @@ -1392,7 +1389,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, static int drbd_drain_block(struct drbd_conf *mdev, int data_size) { struct page *page; - int rr, err = 0; + int err = 0; void *data; if (!data_size) @@ -1404,16 +1401,10 @@ static int drbd_drain_block(struct drbd_conf *mdev, int data_size) while (data_size) { unsigned int len = min_t(int, data_size, PAGE_SIZE); - rr = drbd_recv(mdev->tconn, data, len); - if (rr != len) { - if (!signal_pending(current)) - dev_warn(DEV, - "short read receiving data: read %d expected %d\n", - rr, len); - err = (rr < 0) ? rr : -EIO; + err = drbd_recv_all_warn(mdev->tconn, data, len); + if (err) break; - } - data_size -= rr; + data_size -= len; } kunmap(page); drbd_pp_free(mdev, page, 0); @@ -1425,7 +1416,7 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, { struct bio_vec *bvec; struct bio *bio; - int dgs, rr, i, expect; + int dgs, err, i, expect; void *dig_in = mdev->tconn->int_dig_in; void *dig_vv = mdev->tconn->int_dig_vv; @@ -1433,14 +1424,9 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0; if (dgs) { - rr = drbd_recv(mdev->tconn, dig_in, dgs); - if (rr != dgs) { - if (!signal_pending(current)) - dev_warn(DEV, - "short read receiving data reply digest: read %d expected %d\n", - rr, dgs); - return rr < 0 ? rr : -EIO; - } + err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs); + if (err) + return err; } data_size -= dgs; @@ -1453,19 +1439,13 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, D_ASSERT(sector == bio->bi_sector); bio_for_each_segment(bvec, bio, i) { + void *mapped = kmap(bvec->bv_page) + bvec->bv_offset; expect = min_t(int, data_size, bvec->bv_len); - rr = drbd_recv(mdev->tconn, - kmap(bvec->bv_page)+bvec->bv_offset, - expect); + err = drbd_recv_all_warn(mdev->tconn, mapped, expect); kunmap(bvec->bv_page); - if (rr != expect) { - if (!signal_pending(current)) - dev_warn(DEV, "short read receiving data reply: " - "read %d expected %d\n", - rr, expect); - return rr < 0 ? rr : -EIO; - } - data_size -= rr; + if (err) + return err; + data_size -= expect; } if (dgs) { @@ -3984,12 +3964,9 @@ static void drbdd(struct drbd_tconn *tconn) } if (shs) { - err = drbd_recv_all(tconn, &header->payload, shs); - if (err) { - if (!signal_pending(current)) - conn_warn(tconn, "short read while reading sub header: rv=%d\n", err); + err = drbd_recv_all_warn(tconn, &header->payload, shs); + if (err) goto err_out; - } } if (drbd_cmd_handler[pi.cmd].fa_type == CONN) { @@ -4199,7 +4176,7 @@ static int drbd_do_handshake(struct drbd_tconn *tconn) struct p_handshake *p = &tconn->data.rbuf.handshake; const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80); struct packet_info pi; - int err, rv; + int err; err = drbd_send_handshake(tconn); if (err) @@ -4221,13 +4198,9 @@ static int drbd_do_handshake(struct drbd_tconn *tconn) return -1; } - rv = drbd_recv(tconn, &p->head.payload, expect); - - if (rv != expect) { - if (!signal_pending(current)) - conn_warn(tconn, "short read receiving handshake packet: l=%u\n", rv); + err = drbd_recv_all_warn(tconn, &p->head.payload, expect); + if (err) return 0; - } p->protocol_min = be32_to_cpu(p->protocol_min); p->protocol_max = be32_to_cpu(p->protocol_max); @@ -4325,11 +4298,8 @@ static int drbd_do_auth(struct drbd_tconn *tconn) goto fail; } - rv = drbd_recv(tconn, peers_ch, pi.size); - - if (rv != pi.size) { - if (!signal_pending(current)) - conn_warn(tconn, "short read AuthChallenge: l=%u\n", rv); + err = drbd_recv_all_warn(tconn, peers_ch, pi.size); + if (err) { rv = 0; goto fail; } @@ -4375,11 +4345,8 @@ static int drbd_do_auth(struct drbd_tconn *tconn) goto fail; } - rv = drbd_recv(tconn, response , resp_size); - - if (rv != resp_size) { - if (!signal_pending(current)) - conn_warn(tconn, "short read receiving AuthResponse: l=%u\n", rv); + err = drbd_recv_all_warn(tconn, response , resp_size); + if (err) { rv = 0; goto fail; } From 0829f5edf37cb1731e97066288f520de85b7b5e5 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 24 Mar 2011 14:31:22 +0100 Subject: [PATCH 257/609] drbd: drbd_connected(): Return an error code upon failure. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 208a2ec5493..6a3e628b68c 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -781,7 +781,7 @@ static int drbd_socket_okay(struct socket **sock) int drbd_connected(int vnr, void *p, void *data) { struct drbd_conf *mdev = (struct drbd_conf *)p; - int ok = 1; + int err; atomic_set(&mdev->packet_seq, 0); mdev->peer_seq = 0; @@ -790,15 +790,16 @@ int drbd_connected(int vnr, void *p, void *data) &mdev->tconn->cstate_mutex : &mdev->own_state_mutex; - ok &= !drbd_send_sync_param(mdev); - ok &= !drbd_send_sizes(mdev, 0, 0); - ok &= !drbd_send_uuids(mdev); - ok &= !drbd_send_state(mdev); + err = drbd_send_sync_param(mdev); + if (!err) + err = drbd_send_sizes(mdev, 0, 0); + if (!err) + err = drbd_send_uuids(mdev); + if (!err) + err = drbd_send_state(mdev); clear_bit(USE_DEGR_WFC_T, &mdev->flags); clear_bit(RESIZE_PENDING, &mdev->flags); - - - return !ok; + return err; } /* From 0916e0e308716387f16462f66b53c9128587fd0b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 21 Mar 2011 14:10:15 +0100 Subject: [PATCH 258/609] drbd: Always use the same protocol version for the same peer There is no need to send protocol 80 headers to peers that understand protocol 95 headers. Make sure that we don't send protocol 95 headers until we have agreed upon a protocol version with our peer, though. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_receiver.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f633c0bd52e..a1deab6f130 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -706,7 +706,7 @@ static void prepare_header95(struct p_header95 *h, enum drbd_packet cmd, int siz static void _prepare_header(struct drbd_tconn *tconn, int vnr, struct p_header *h, enum drbd_packet cmd, int size) { - if (tconn->agreed_pro_version >= 100 || size > DRBD_MAX_SIZE_H80_PACKET) + if (tconn->agreed_pro_version >= 95) prepare_header95(&h->h95, cmd, size); else prepare_header80(&h->h80, cmd, size); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6a3e628b68c..27d4a452064 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -819,9 +819,9 @@ static int drbd_connect(struct drbd_tconn *tconn) return -2; clear_bit(DISCARD_CONCURRENT, &tconn->flags); - tconn->agreed_pro_version = 99; - /* agreed_pro_version must be smaller than 100 so we send the old - header (h80) in the first packet and in the handshake packet. */ + + /* Assume that the peer only understands protocol 80 until we know better. */ + tconn->agreed_pro_version = 80; sock = NULL; msock = NULL; From e307f352b4a6b4411136acd8cab20f9259ba182a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 22 Mar 2011 10:55:48 +0100 Subject: [PATCH 259/609] drbd: Move drbd_send_ping() and drbd_send_ping_ack() to drbd_main.c Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 13 ++----------- drivers/block/drbd/drbd_main.c | 12 ++++++++++++ 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 3a046e418aa..4f0cafa1887 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1939,17 +1939,8 @@ static inline int drbd_send_short_cmd(struct drbd_conf *mdev, return drbd_send_cmd(mdev, &mdev->tconn->data, cmd, &h, sizeof(h)); } -static inline int drbd_send_ping(struct drbd_tconn *tconn) -{ - struct p_header h; - return !conn_send_cmd(tconn, 0, &tconn->meta, P_PING, &h, sizeof(h)); -} - -static inline int drbd_send_ping_ack(struct drbd_tconn *tconn) -{ - struct p_header h; - return !conn_send_cmd(tconn, 0, &tconn->meta, P_PING_ACK, &h, sizeof(h)); -} +extern int drbd_send_ping(struct drbd_tconn *tconn); +extern int drbd_send_ping_ack(struct drbd_tconn *tconn); static inline int drbd_send_state_req(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index a1deab6f130..d9828612c8e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -765,6 +765,18 @@ int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data, return err; } +int drbd_send_ping(struct drbd_tconn *tconn) +{ + struct p_header h; + return !conn_send_cmd(tconn, 0, &tconn->meta, P_PING, &h, sizeof(h)); +} + +int drbd_send_ping_ack(struct drbd_tconn *tconn) +{ + struct p_header h; + return !conn_send_cmd(tconn, 0, &tconn->meta, P_PING_ACK, &h, sizeof(h)); +} + int drbd_send_sync_param(struct drbd_conf *mdev) { struct p_rs_param_95 *p; From 058820cdd7e6c96e6656a990af2188059113735e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 22 Mar 2011 16:03:43 +0100 Subject: [PATCH 260/609] drbd: Make _drbd_send_bitmap() static Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 - drivers/block/drbd/drbd_main.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4f0cafa1887..d3acc1597dc 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1201,7 +1201,6 @@ extern int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector, extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size); extern int drbd_send_bitmap(struct drbd_conf *mdev); -extern int _drbd_send_bitmap(struct drbd_conf *mdev); extern void drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode); extern int conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode); extern void drbd_free_bc(struct drbd_backing_dev *ldev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index d9828612c8e..076ccb52e81 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1182,7 +1182,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, } /* See the comment at receive_bitmap() */ -int _drbd_send_bitmap(struct drbd_conf *mdev) +static int _drbd_send_bitmap(struct drbd_conf *mdev) { struct bm_xfer_ctx c; struct p_header *p; From a02d124091ac7178c24a1318e5dcc4ae7fe6c8e1 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 22 Mar 2011 17:20:45 +0100 Subject: [PATCH 261/609] drbd: Rename the DCBP_* functions to dcbp_* and move them to where they are used Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 39 ------------------------------ drivers/block/drbd/drbd_main.c | 25 ++++++++++++++++--- drivers/block/drbd/drbd_receiver.c | 21 +++++++++++++--- 3 files changed, 39 insertions(+), 46 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index d3acc1597dc..c1eb4462096 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -541,45 +541,6 @@ struct p_delay_probe93 { u32 offset; /* usecs the probe got sent after the reference time point */ } __packed; -/* DCBP: Drbd Compressed Bitmap Packet ... */ -static inline enum drbd_bitmap_code -DCBP_get_code(struct p_compressed_bm *p) -{ - return (enum drbd_bitmap_code)(p->encoding & 0x0f); -} - -static inline void -DCBP_set_code(struct p_compressed_bm *p, enum drbd_bitmap_code code) -{ - BUG_ON(code & ~0xf); - p->encoding = (p->encoding & ~0xf) | code; -} - -static inline int -DCBP_get_start(struct p_compressed_bm *p) -{ - return (p->encoding & 0x80) != 0; -} - -static inline void -DCBP_set_start(struct p_compressed_bm *p, int set) -{ - p->encoding = (p->encoding & ~0x80) | (set ? 0x80 : 0); -} - -static inline int -DCBP_get_pad_bits(struct p_compressed_bm *p) -{ - return (p->encoding >> 4) & 0x7; -} - -static inline void -DCBP_set_pad_bits(struct p_compressed_bm *p, int n) -{ - BUG_ON(n & ~0x7); - p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4); -} - /* one bitmap packet, including the p_header, * should fit within one _architecture independend_ page. * so we need to use the fixed size 4KiB page size diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 076ccb52e81..dbe5c638723 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1029,6 +1029,23 @@ int conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode) return !conn_send_cmd(tconn, 0, &tconn->meta, cmd, &p.head, sizeof(p)); } +static void dcbp_set_code(struct p_compressed_bm *p, enum drbd_bitmap_code code) +{ + BUG_ON(code & ~0xf); + p->encoding = (p->encoding & ~0xf) | code; +} + +static void dcbp_set_start(struct p_compressed_bm *p, int set) +{ + p->encoding = (p->encoding & ~0x80) | (set ? 0x80 : 0); +} + +static void dcbp_set_pad_bits(struct p_compressed_bm *p, int n) +{ + BUG_ON(n & ~0x7); + p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4); +} + int fill_bitmap_rle_bits(struct drbd_conf *mdev, struct p_compressed_bm *p, struct bm_xfer_ctx *c) @@ -1073,12 +1090,12 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, if (rl == 0) { /* the first checked bit was set, * store start value, */ - DCBP_set_start(p, 1); + dcbp_set_start(p, 1); /* but skip encoding of zero run length */ toggle = !toggle; continue; } - DCBP_set_start(p, 0); + dcbp_set_start(p, 0); } /* paranoia: catch zero runlength. @@ -1118,7 +1135,7 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, bm_xfer_ctx_bit_to_word_offset(c); /* store pad_bits */ - DCBP_set_pad_bits(p, (8 - bs.cur.bit) & 0x7); + dcbp_set_pad_bits(p, (8 - bs.cur.bit) & 0x7); return len; } @@ -1143,7 +1160,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, return -EIO; if (len) { - DCBP_set_code(p, RLE_VLI_Bits); + dcbp_set_code(p, RLE_VLI_Bits); err = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_COMPRESSED_BITMAP, h, sizeof(*p) + len, 0); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 27d4a452064..1fd871bc889 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3600,6 +3600,21 @@ receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size, return 1; } +static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p) +{ + return (enum drbd_bitmap_code)(p->encoding & 0x0f); +} + +static int dcbp_get_start(struct p_compressed_bm *p) +{ + return (p->encoding & 0x80) != 0; +} + +static int dcbp_get_pad_bits(struct p_compressed_bm *p) +{ + return (p->encoding >> 4) & 0x7; +} + /** * recv_bm_rle_bits * @@ -3618,11 +3633,11 @@ recv_bm_rle_bits(struct drbd_conf *mdev, u64 tmp; unsigned long s = c->bit_offset; unsigned long e; - int toggle = DCBP_get_start(p); + int toggle = dcbp_get_start(p); int have; int bits; - bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p)); + bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p)); bits = bitstream_get_bits(&bs, &look_ahead, 64); if (bits < 0) @@ -3677,7 +3692,7 @@ decode_bitmap_c(struct drbd_conf *mdev, struct bm_xfer_ctx *c, unsigned int len) { - if (DCBP_get_code(p) == RLE_VLI_Bits) + if (dcbp_get_code(p) == RLE_VLI_Bits) return recv_bm_rle_bits(mdev, p, c, len); /* other variants had been implemented for evaluation, From cb703454a283d8dd5599e928eeea30367ca18874 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 24 Mar 2011 11:03:07 +0100 Subject: [PATCH 262/609] drbd: Converted drbd_try_outdate_peer() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 +- drivers/block/drbd/drbd_nl.c | 152 ++++++++++++++--------------- drivers/block/drbd/drbd_receiver.c | 7 +- drivers/block/drbd/drbd_state.c | 85 +++++++++++++--- drivers/block/drbd/drbd_state.h | 5 + include/linux/drbd.h | 3 +- 6 files changed, 156 insertions(+), 100 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c1eb4462096..74637cc1461 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1472,8 +1472,8 @@ extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev); extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force); -extern enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev); -extern void drbd_try_outdate_peer_async(struct drbd_conf *mdev); +extern bool conn_try_outdate_peer(struct drbd_tconn *tconn); +extern void conn_try_outdate_peer_async(struct drbd_tconn *tconn); extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); /* drbd_worker.c */ diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index f1ec727f7df..85290a9beb6 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -366,116 +366,122 @@ int conn_khelper(struct drbd_tconn *tconn, char *cmd) return ret; } -enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev) +static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn) { - char *ex_to_string; - int r; - enum drbd_disk_state nps; - enum drbd_fencing_p fp; + enum drbd_fencing_p fp = FP_NOT_AVAIL; + struct drbd_conf *mdev; + int vnr; - D_ASSERT(mdev->state.pdsk == D_UNKNOWN); - - if (get_ldev_if_state(mdev, D_CONSISTENT)) { - fp = mdev->ldev->dc.fencing; - put_ldev(mdev); - } else { - dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n"); - nps = mdev->state.pdsk; - goto out; + idr_for_each_entry(&tconn->volumes, mdev, vnr) { + if (get_ldev_if_state(mdev, D_CONSISTENT)) { + fp = max_t(enum drbd_fencing_p, fp, mdev->ldev->dc.fencing); + put_ldev(mdev); + } } - r = drbd_khelper(mdev, "fence-peer"); + return fp; +} + +bool conn_try_outdate_peer(struct drbd_tconn *tconn) +{ + union drbd_state mask = { }; + union drbd_state val = { }; + enum drbd_fencing_p fp; + char *ex_to_string; + int r; + + if (tconn->cstate >= C_WF_REPORT_PARAMS) { + conn_err(tconn, "Expected cstate < C_WF_REPORT_PARAMS\n"); + return false; + } + + fp = highest_fencing_policy(tconn); + switch (fp) { + case FP_NOT_AVAIL: + conn_warn(tconn, "Not fencing peer, I'm not even Consistent myself.\n"); + goto out; + case FP_DONT_CARE: + return true; + default: ; + } + + r = conn_khelper(tconn, "fence-peer"); switch ((r>>8) & 0xff) { case 3: /* peer is inconsistent */ ex_to_string = "peer is inconsistent or worse"; - nps = D_INCONSISTENT; + mask.pdsk = D_MASK; + val.pdsk = D_INCONSISTENT; break; case 4: /* peer got outdated, or was already outdated */ ex_to_string = "peer was fenced"; - nps = D_OUTDATED; + mask.pdsk = D_MASK; + val.pdsk = D_OUTDATED; break; case 5: /* peer was down */ - if (mdev->state.disk == D_UP_TO_DATE) { + if (conn_highest_disk(tconn) == D_UP_TO_DATE) { /* we will(have) create(d) a new UUID anyways... */ ex_to_string = "peer is unreachable, assumed to be dead"; - nps = D_OUTDATED; + mask.pdsk = D_MASK; + val.pdsk = D_OUTDATED; } else { ex_to_string = "peer unreachable, doing nothing since disk != UpToDate"; - nps = mdev->state.pdsk; } break; case 6: /* Peer is primary, voluntarily outdate myself. * This is useful when an unconnected R_SECONDARY is asked to * become R_PRIMARY, but finds the other peer being active. */ ex_to_string = "peer is active"; - dev_warn(DEV, "Peer is primary, outdating myself.\n"); - nps = D_UNKNOWN; - _drbd_request_state(mdev, NS(disk, D_OUTDATED), CS_WAIT_COMPLETE); + conn_warn(tconn, "Peer is primary, outdating myself.\n"); + mask.disk = D_MASK; + val.disk = D_OUTDATED; break; case 7: if (fp != FP_STONITH) - dev_err(DEV, "fence-peer() = 7 && fencing != Stonith !!!\n"); + conn_err(tconn, "fence-peer() = 7 && fencing != Stonith !!!\n"); ex_to_string = "peer was stonithed"; - nps = D_OUTDATED; + mask.pdsk = D_MASK; + val.pdsk = D_OUTDATED; break; default: /* The script is broken ... */ - nps = D_UNKNOWN; - dev_err(DEV, "fence-peer helper broken, returned %d\n", (r>>8)&0xff); - return nps; + conn_err(tconn, "fence-peer helper broken, returned %d\n", (r>>8)&0xff); + return false; /* Eventually leave IO frozen */ } - dev_info(DEV, "fence-peer helper returned %d (%s)\n", - (r>>8) & 0xff, ex_to_string); + conn_info(tconn, "fence-peer helper returned %d (%s)\n", + (r>>8) & 0xff, ex_to_string); -out: - if (mdev->state.susp_fen && nps >= D_UNKNOWN) { - /* The handler was not successful... unfreeze here, the - state engine can not unfreeze... */ - _drbd_request_state(mdev, NS(susp_fen, 0), CS_VERBOSE); - } + out: - return nps; + /* Not using + conn_request_state(tconn, mask, val, CS_VERBOSE); + here, because we might were able to re-establish the connection in the + meantime. */ + spin_lock_irq(&tconn->req_lock); + if (tconn->cstate < C_WF_REPORT_PARAMS) + _conn_request_state(tconn, mask, val, CS_VERBOSE); + spin_unlock_irq(&tconn->req_lock); + + return conn_highest_pdsk(tconn) <= D_OUTDATED; } static int _try_outdate_peer_async(void *data) { - struct drbd_conf *mdev = (struct drbd_conf *)data; - enum drbd_disk_state nps; - union drbd_state ns; + struct drbd_tconn *tconn = (struct drbd_tconn *)data; - nps = drbd_try_outdate_peer(mdev); - - /* Not using - drbd_request_state(mdev, NS(pdsk, nps)); - here, because we might were able to re-establish the connection - in the meantime. This can only partially be solved in the state's - engine is_valid_state() and is_valid_state_transition() - functions. - - nps can be D_INCONSISTENT, D_OUTDATED or D_UNKNOWN. - pdsk == D_INCONSISTENT while conn >= C_CONNECTED is valid, - therefore we have to have the pre state change check here. - */ - spin_lock_irq(&mdev->tconn->req_lock); - ns = mdev->state; - if (ns.conn < C_WF_REPORT_PARAMS) { - ns.pdsk = nps; - _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); - } - spin_unlock_irq(&mdev->tconn->req_lock); + conn_try_outdate_peer(tconn); return 0; } -void drbd_try_outdate_peer_async(struct drbd_conf *mdev) +void conn_try_outdate_peer_async(struct drbd_tconn *tconn) { struct task_struct *opa; - opa = kthread_run(_try_outdate_peer_async, mdev, "drbd%d_a_helper", mdev_to_minor(mdev)); + opa = kthread_run(_try_outdate_peer_async, tconn, "drbd_async_h"); if (IS_ERR(opa)) - dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n"); + conn_err(tconn, "out of mem, failed to invoke fence-peer helper\n"); } enum drbd_state_rv @@ -486,7 +492,6 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) int try = 0; int forced = 0; union drbd_state mask, val; - enum drbd_disk_state nps; if (new_role == R_PRIMARY) request_ping(mdev->tconn); /* Detect a dead peer ASAP */ @@ -519,32 +524,23 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) if (rv == SS_NO_UP_TO_DATE_DISK && mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) { D_ASSERT(mdev->state.pdsk == D_UNKNOWN); - nps = drbd_try_outdate_peer(mdev); - if (nps == D_OUTDATED || nps == D_INCONSISTENT) { + if (conn_try_outdate_peer(mdev->tconn)) { val.disk = D_UP_TO_DATE; mask.disk = D_MASK; } - - val.pdsk = nps; - mask.pdsk = D_MASK; - continue; } if (rv == SS_NOTHING_TO_DO) goto out; if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) { - nps = drbd_try_outdate_peer(mdev); - - if (force && nps > D_OUTDATED) { + if (!conn_try_outdate_peer(mdev->tconn) && force) { dev_warn(DEV, "Forced into split brain situation!\n"); - nps = D_OUTDATED; + mask.pdsk = D_MASK; + val.pdsk = D_OUTDATED; + } - - mask.pdsk = D_MASK; - val.pdsk = nps; - continue; } if (rv == SS_TWO_PRIMARIES) { diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1fd871bc889..91aa49f478e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4030,9 +4030,11 @@ static void drbd_disconnect(struct drbd_tconn *tconn) drbd_free_sock(tconn); idr_for_each(&tconn->volumes, drbd_disconnected, tconn); - conn_info(tconn, "Connection closed\n"); + if (conn_highest_role(tconn) == R_PRIMARY && conn_highest_pdsk(tconn) >= D_UNKNOWN) + conn_try_outdate_peer_async(tconn); + spin_lock_irq(&tconn->req_lock); oc = tconn->cstate; if (oc >= C_UNCONNECTED) @@ -4109,9 +4111,6 @@ static int drbd_disconnected(int vnr, void *p, void *data) put_ldev(mdev); } - if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN) - drbd_try_outdate_peer_async(mdev); - /* serialize with bitmap writeout triggered by the state change, * if any. */ wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 52ff1c7379e..b4f668db329 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -61,6 +61,73 @@ bool conn_all_vols_unconf(struct drbd_tconn *tconn) return true; } +/* Unfortunately the states where not correctly ordered, when + they where defined. therefore can not use max_t() here. */ +static enum drbd_role max_role(enum drbd_role role1, enum drbd_role role2) +{ + if (role1 == R_PRIMARY || role2 == R_PRIMARY) + return R_PRIMARY; + if (role1 == R_SECONDARY || role2 == R_SECONDARY) + return R_SECONDARY; + return R_UNKNOWN; +} +static enum drbd_role min_role(enum drbd_role role1, enum drbd_role role2) +{ + if (role1 == R_UNKNOWN || role2 == R_UNKNOWN) + return R_UNKNOWN; + if (role1 == R_SECONDARY || role2 == R_SECONDARY) + return R_SECONDARY; + return R_PRIMARY; +} + +enum drbd_role conn_highest_role(struct drbd_tconn *tconn) +{ + enum drbd_role role = R_UNKNOWN; + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) + role = max_role(role, mdev->state.role); + + return role; +} + +enum drbd_role conn_highest_peer(struct drbd_tconn *tconn) +{ + enum drbd_role peer = R_UNKNOWN; + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) + peer = max_role(peer, mdev->state.peer); + + return peer; +} + +enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn) +{ + enum drbd_disk_state ds = D_DISKLESS; + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) + ds = max_t(enum drbd_disk_state, ds, mdev->state.disk); + + return ds; +} + +enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn) +{ + enum drbd_disk_state ds = D_DISKLESS; + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) + ds = max_t(enum drbd_disk_state, ds, mdev->state.pdsk); + + return ds; +} + /** * cl_wide_st_chg() - true if the state change is a cluster wide one * @mdev: DRBD device. @@ -329,18 +396,6 @@ static void print_state_change(struct drbd_conf *mdev, union drbd_state os, unio dev_info(DEV, "%s\n", pb); } -static bool vol_has_primary_peer(struct drbd_tconn *tconn) -{ - struct drbd_conf *mdev; - int vnr; - - idr_for_each_entry(&tconn->volumes, mdev, vnr) { - if (mdev->state.peer == R_PRIMARY) - return true; - } - return false; -} - /** * is_valid_state() - Returns an SS_ error code if ns is not valid * @mdev: DRBD device. @@ -364,7 +419,7 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) if (!mdev->tconn->net_conf->two_primaries && ns.role == R_PRIMARY) { if (ns.peer == R_PRIMARY) rv = SS_TWO_PRIMARIES; - else if (vol_has_primary_peer(mdev->tconn)) + else if (conn_highest_peer(mdev->tconn) == R_PRIMARY) rv = SS_O_VOL_PEER_PRI; } put_net_conf(mdev->tconn); @@ -1390,8 +1445,8 @@ static int _set_state_itr_fn(int vnr, void *p, void *data) rv = __drbd_set_state(mdev, ns, flags, NULL); - ms.role = max_t(enum drbd_role, mdev->state.role, ms.role); - ms.peer = max_t(enum drbd_role, mdev->state.peer, ms.peer); + ms.role = max_role(ns.role, ms.role); + ms.peer = max_role(ns.peer, ms.peer); ms.disk = max_t(enum drbd_role, mdev->state.disk, ms.disk); ms.pdsk = max_t(enum drbd_role, mdev->state.pdsk, ms.pdsk); params->ms = ms; diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index 55df0728bc8..394a1998acd 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -110,4 +110,9 @@ static inline int drbd_request_state(struct drbd_conf *mdev, return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED); } +enum drbd_role conn_highest_role(struct drbd_tconn *tconn); +enum drbd_role conn_highest_peer(struct drbd_tconn *tconn); +enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn); +enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn); + #endif diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 9cdb888607a..60d30881909 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -65,7 +65,8 @@ enum drbd_io_error_p { }; enum drbd_fencing_p { - FP_DONT_CARE, + FP_NOT_AVAIL = -1, /* Not a policy */ + FP_DONT_CARE = 0, FP_RESOURCE, FP_STONITH }; From e6ef8a5cb3fcf5e5529a26d2cab5bd891c7964b1 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 24 Mar 2011 18:07:54 +0100 Subject: [PATCH 263/609] drbd: Preallocate one page per drbd_socket as a receive buffer Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 +- drivers/block/drbd/drbd_main.c | 22 ++++++++++ drivers/block/drbd/drbd_receiver.c | 65 +++++++++++++----------------- 3 files changed, 54 insertions(+), 37 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 74637cc1461..932ba030435 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -554,6 +554,8 @@ struct p_delay_probe93 { #error "PAGE_SIZE too small" #endif +#define DRBD_SOCKET_BUFFER_SIZE 4096 + union p_polymorph { struct p_header header; struct p_handshake handshake; @@ -803,7 +805,7 @@ struct drbd_socket { /* this way we get our * send/receive buffers off the stack */ union p_polymorph sbuf; - union p_polymorph rbuf; + void *rbuf; }; struct drbd_md { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index dbe5c638723..004a8ce7f1d 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2272,6 +2272,19 @@ found: return tconn; } +static int drbd_alloc_socket(struct drbd_socket *socket) +{ + socket->rbuf = (void *) __get_free_page(GFP_KERNEL); + if (!socket->rbuf) + return -ENOMEM; + return 0; +} + +static void drbd_free_socket(struct drbd_socket *socket) +{ + free_page((unsigned long) socket->rbuf); +} + struct drbd_tconn *drbd_new_tconn(const char *name) { struct drbd_tconn *tconn; @@ -2284,6 +2297,11 @@ struct drbd_tconn *drbd_new_tconn(const char *name) if (!tconn->name) goto fail; + if (drbd_alloc_socket(&tconn->data)) + goto fail; + if (drbd_alloc_socket(&tconn->meta)) + goto fail; + if (!zalloc_cpumask_var(&tconn->cpu_mask, GFP_KERNEL)) goto fail; @@ -2322,6 +2340,8 @@ struct drbd_tconn *drbd_new_tconn(const char *name) fail: tl_cleanup(tconn); free_cpumask_var(tconn->cpu_mask); + drbd_free_socket(&tconn->meta); + drbd_free_socket(&tconn->data); kfree(tconn->name); kfree(tconn); @@ -2334,6 +2354,8 @@ void drbd_free_tconn(struct drbd_tconn *tconn) idr_destroy(&tconn->volumes); free_cpumask_var(tconn->cpu_mask); + drbd_free_socket(&tconn->meta); + drbd_free_socket(&tconn->data); kfree(tconn->name); kfree(tconn->int_dig_out); kfree(tconn->int_dig_in); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 91aa49f478e..50e837e3bee 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -743,7 +743,7 @@ static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock) { - struct p_header80 *h = &tconn->data.rbuf.header.h80; + struct p_header80 *h = tconn->data.rbuf; int rr; rr = drbd_recv_short(sock, h, sizeof(*h), 0); @@ -994,7 +994,7 @@ static int decode_header(struct drbd_tconn *tconn, struct p_header *h, struct pa static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi) { - struct p_header *h = &tconn->data.rbuf.header; + struct p_header *h = tconn->data.rbuf; int err; err = drbd_recv_all_warn(tconn, h, sizeof(*h)); @@ -1236,7 +1236,7 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd, unsigned int data_size) { int rv; - struct p_barrier *p = &mdev->tconn->data.rbuf.barrier; + struct p_barrier *p = mdev->tconn->data.rbuf; struct drbd_epoch *epoch; inc_unacked(mdev); @@ -1546,7 +1546,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd, struct drbd_request *req; sector_t sector; int err; - struct p_data *p = &mdev->tconn->data.rbuf.data; + struct p_data *p = mdev->tconn->data.rbuf; sector = be64_to_cpu(p->sector); @@ -1574,7 +1574,7 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd, { sector_t sector; int err; - struct p_data *p = &mdev->tconn->data.rbuf.data; + struct p_data *p = mdev->tconn->data.rbuf; sector = be64_to_cpu(p->sector); D_ASSERT(p->block_id == ID_SYNCER); @@ -1963,7 +1963,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, { sector_t sector; struct drbd_peer_request *peer_req; - struct p_data *p = &mdev->tconn->data.rbuf.data; + struct p_data *p = mdev->tconn->data.rbuf; u32 peer_seq = be32_to_cpu(p->seq_num); int rw = WRITE; u32 dp_flags; @@ -2147,7 +2147,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, struct digest_info *di = NULL; int size, verb; unsigned int fault_type; - struct p_block_req *p = &mdev->tconn->data.rbuf.block_req; + struct p_block_req *p = mdev->tconn->data.rbuf; sector = be64_to_cpu(p->sector); size = be32_to_cpu(p->blksize); @@ -2867,7 +2867,7 @@ static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self) static int receive_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd, unsigned int data_size) { - struct p_protocol *p = &tconn->data.rbuf.protocol; + struct p_protocol *p = tconn->data.rbuf; int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; int p_want_lose, p_two_primaries, cf; char p_integrity_alg[SHARED_SECRET_MAX] = ""; @@ -2969,7 +2969,7 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, unsigned int packet_size) { - struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95; + struct p_rs_param_95 *p = mdev->tconn->data.rbuf; unsigned int header_size, data_size, exp_max_sz; struct crypto_hash *verify_tfm = NULL; struct crypto_hash *csums_tfm = NULL; @@ -3141,7 +3141,7 @@ static void warn_if_differ_considerably(struct drbd_conf *mdev, static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd, unsigned int data_size) { - struct p_sizes *p = &mdev->tconn->data.rbuf.sizes; + struct p_sizes *p = mdev->tconn->data.rbuf; enum determine_dev_size dd = unchanged; sector_t p_size, p_usize, my_usize; int ldsc = 0; /* local disk size changed */ @@ -3239,7 +3239,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd, static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd, unsigned int data_size) { - struct p_uuids *p = &mdev->tconn->data.rbuf.uuids; + struct p_uuids *p = mdev->tconn->data.rbuf; u64 *p_uuid; int i, updated_uuids = 0; @@ -3335,7 +3335,7 @@ static union drbd_state convert_state(union drbd_state ps) static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd, unsigned int data_size) { - struct p_req_state *p = &mdev->tconn->data.rbuf.req_state; + struct p_req_state *p = mdev->tconn->data.rbuf; union drbd_state mask, val; enum drbd_state_rv rv; @@ -3362,7 +3362,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd, static int receive_req_conn_state(struct drbd_tconn *tconn, enum drbd_packet cmd, unsigned int data_size) { - struct p_req_state *p = &tconn->data.rbuf.req_state; + struct p_req_state *p = tconn->data.rbuf; union drbd_state mask, val; enum drbd_state_rv rv; @@ -3387,7 +3387,7 @@ static int receive_req_conn_state(struct drbd_tconn *tconn, enum drbd_packet cmd static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd, unsigned int data_size) { - struct p_state *p = &mdev->tconn->data.rbuf.state; + struct p_state *p = mdev->tconn->data.rbuf; union drbd_state os, ns, peer_state; enum drbd_disk_state real_peer_disk; enum chg_state_flags cs_flags; @@ -3540,7 +3540,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd, static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd, unsigned int data_size) { - struct p_rs_uuid *p = &mdev->tconn->data.rbuf.rs_uuid; + struct p_rs_uuid *p = mdev->tconn->data.rbuf; wait_event(mdev->misc_wait, mdev->state.conn == C_WF_SYNC_UUID || @@ -3752,7 +3752,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, struct bm_xfer_ctx c; void *buffer; int err; - struct p_header *h = &mdev->tconn->data.rbuf.header; + struct p_header *h = mdev->tconn->data.rbuf; struct packet_info pi; drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED); @@ -3893,7 +3893,7 @@ static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd, static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd, unsigned int data_size) { - struct p_block_desc *p = &mdev->tconn->data.rbuf.block_desc; + struct p_block_desc *p = mdev->tconn->data.rbuf; switch (mdev->state.conn) { case C_WF_SYNC_UUID: @@ -3948,16 +3948,9 @@ static struct data_cmd drbd_cmd_handler[] = { [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), CONN, { .conn_fn = receive_req_conn_state } }, }; -/* All handler functions that expect a sub-header get that sub-heder in - mdev->tconn->data.rbuf.header.head.payload. - - Usually in mdev->tconn->data.rbuf.header.head the callback can find the usual - p_header, but they may not rely on that. Since there is also p_header95 ! - */ - static void drbdd(struct drbd_tconn *tconn) { - struct p_header *header = &tconn->data.rbuf.header; + struct p_header *header = tconn->data.rbuf; struct packet_info pi; size_t shs; /* sub header size */ int err; @@ -4188,7 +4181,7 @@ static int drbd_send_handshake(struct drbd_tconn *tconn) static int drbd_do_handshake(struct drbd_tconn *tconn) { /* ASSERT current == tconn->receiver ... */ - struct p_handshake *p = &tconn->data.rbuf.handshake; + struct p_handshake *p = tconn->data.rbuf; const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80); struct packet_info pi; int err; @@ -4435,7 +4428,7 @@ int drbdd_init(struct drbd_thread *thi) static int got_conn_RqSReply(struct drbd_tconn *tconn, enum drbd_packet cmd) { - struct p_req_state_reply *p = &tconn->meta.rbuf.req_state_reply; + struct p_req_state_reply *p = tconn->meta.rbuf; int retcode = be32_to_cpu(p->retcode); if (retcode >= SS_SUCCESS) { @@ -4452,7 +4445,7 @@ static int got_conn_RqSReply(struct drbd_tconn *tconn, enum drbd_packet cmd) static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd) { - struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply; + struct p_req_state_reply *p = mdev->tconn->meta.rbuf; int retcode = be32_to_cpu(p->retcode); if (retcode >= SS_SUCCESS) { @@ -4485,7 +4478,7 @@ static int got_PingAck(struct drbd_tconn *tconn, enum drbd_packet cmd) static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packet cmd) { - struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; + struct p_block_ack *p = mdev->tconn->meta.rbuf; sector_t sector = be64_to_cpu(p->sector); int blksize = be32_to_cpu(p->blksize); @@ -4530,7 +4523,7 @@ validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector, static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd) { - struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; + struct p_block_ack *p = mdev->tconn->meta.rbuf; sector_t sector = be64_to_cpu(p->sector); int blksize = be32_to_cpu(p->blksize); enum drbd_req_event what; @@ -4575,7 +4568,7 @@ static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd) static int got_NegAck(struct drbd_conf *mdev, enum drbd_packet cmd) { - struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; + struct p_block_ack *p = mdev->tconn->meta.rbuf; sector_t sector = be64_to_cpu(p->sector); int size = be32_to_cpu(p->blksize); bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A || @@ -4608,7 +4601,7 @@ static int got_NegAck(struct drbd_conf *mdev, enum drbd_packet cmd) static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd) { - struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; + struct p_block_ack *p = mdev->tconn->meta.rbuf; sector_t sector = be64_to_cpu(p->sector); update_peer_seq(mdev, be32_to_cpu(p->seq_num)); @@ -4625,7 +4618,7 @@ static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd) { sector_t sector; int size; - struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; + struct p_block_ack *p = mdev->tconn->meta.rbuf; sector = be64_to_cpu(p->sector); size = be32_to_cpu(p->blksize); @@ -4654,7 +4647,7 @@ static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd) static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd) { - struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack; + struct p_barrier_ack *p = mdev->tconn->meta.rbuf; tl_release(mdev->tconn, p->barrier, be32_to_cpu(p->set_size)); @@ -4670,7 +4663,7 @@ static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd) static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd) { - struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; + struct p_block_ack *p = mdev->tconn->meta.rbuf; struct drbd_work *w; sector_t sector; int size; @@ -4776,7 +4769,7 @@ static struct asender_cmd asender_tbl[] = { int drbd_asender(struct drbd_thread *thi) { struct drbd_tconn *tconn = thi->tconn; - struct p_header *h = &tconn->meta.rbuf.header; + struct p_header *h = tconn->meta.rbuf; struct asender_cmd *cmd = NULL; struct packet_info pi; int rv; From fc56815c81ee12f8d5ce9af952159056556ea8ab Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 24 Mar 2011 21:23:50 +0100 Subject: [PATCH 264/609] drbd: receive_bitmap(): Use the pre-allocated receive buffer Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 50e837e3bee..59e5a50332d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3574,8 +3574,9 @@ static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd, */ static int receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size, - unsigned long *buffer, struct bm_xfer_ctx *c) + struct p_header *h, struct bm_xfer_ctx *c) { + unsigned long *buffer = (unsigned long *)h->payload; unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset); unsigned want = num_words * sizeof(long); int err; @@ -3750,7 +3751,6 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, unsigned int data_size) { struct bm_xfer_ctx c; - void *buffer; int err; struct p_header *h = mdev->tconn->data.rbuf; struct packet_info pi; @@ -3759,15 +3759,6 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, /* you are supposed to send additional out-of-sync information * if you actually set bits during this phase */ - /* maybe we should use some per thread scratch page, - * and allocate that during initial device creation? */ - buffer = (unsigned long *) __get_free_page(GFP_NOIO); - if (!buffer) { - dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__); - err = -ENOMEM; - goto out; - } - c = (struct bm_xfer_ctx) { .bm_bits = drbd_bm_bits(mdev), .bm_words = drbd_bm_words(mdev), @@ -3775,7 +3766,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, for(;;) { if (cmd == P_BITMAP) { - err = receive_bitmap_plain(mdev, data_size, buffer, &c); + err = receive_bitmap_plain(mdev, data_size, h, &c); } else if (cmd == P_COMPRESSED_BITMAP) { /* MAYBE: sanity check that we speak proto >= 90, * and the feature is enabled! */ @@ -3786,9 +3777,8 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, err = -EIO; goto out; } - /* use the page buff */ - p = buffer; - memcpy(p, h, sizeof(*h)); + + p = mdev->tconn->data.rbuf; err = drbd_recv_all(mdev->tconn, p->head.payload, data_size); if (err) goto out; @@ -3842,7 +3832,6 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, drbd_bm_unlock(mdev); if (!err && mdev->state.conn == C_WF_BITMAP_S) drbd_start_resync(mdev, C_SYNC_SOURCE); - free_page((unsigned long) buffer); return err; } From 5a87d920f38fcafb790ddd03f0d8d1db56b268a8 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 24 Mar 2011 21:17:52 +0100 Subject: [PATCH 265/609] drbd: Preallocate one page per drbd_socket as a send buffer Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 23 +---------------------- drivers/block/drbd/drbd_main.c | 6 +++++- drivers/block/drbd/drbd_receiver.c | 4 ++-- drivers/block/drbd/drbd_worker.c | 2 +- 4 files changed, 9 insertions(+), 26 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 932ba030435..9f1f0f56f71 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -556,27 +556,6 @@ struct p_delay_probe93 { #define DRBD_SOCKET_BUFFER_SIZE 4096 -union p_polymorph { - struct p_header header; - struct p_handshake handshake; - struct p_data data; - struct p_block_ack block_ack; - struct p_barrier barrier; - struct p_barrier_ack barrier_ack; - struct p_rs_param_89 rs_param_89; - struct p_rs_param_95 rs_param_95; - struct p_protocol protocol; - struct p_sizes sizes; - struct p_uuids uuids; - struct p_state state; - struct p_req_state req_state; - struct p_req_state_reply req_state_reply; - struct p_block_req block_req; - struct p_delay_probe93 delay_probe93; - struct p_rs_uuid rs_uuid; - struct p_block_desc block_desc; -} __packed; - /**********************************************************************/ enum drbd_thread_state { NONE, @@ -804,7 +783,7 @@ struct drbd_socket { struct socket *socket; /* this way we get our * send/receive buffers off the stack */ - union p_polymorph sbuf; + void *sbuf; void *rbuf; }; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 004a8ce7f1d..16ec6c3bd7d 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -800,7 +800,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev) enum drbd_packet cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM; - p = &mdev->tconn->data.sbuf.rs_param_95; + p = mdev->tconn->data.sbuf; /* initialize verify_alg and csums_alg */ memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); @@ -2277,11 +2277,15 @@ static int drbd_alloc_socket(struct drbd_socket *socket) socket->rbuf = (void *) __get_free_page(GFP_KERNEL); if (!socket->rbuf) return -ENOMEM; + socket->sbuf = (void *) __get_free_page(GFP_KERNEL); + if (!socket->sbuf) + return -ENOMEM; return 0; } static void drbd_free_socket(struct drbd_socket *socket) { + free_page((unsigned long) socket->sbuf); free_page((unsigned long) socket->rbuf); } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 59e5a50332d..e75eb270263 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -736,7 +736,7 @@ out: static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd_packet cmd) { - struct p_header *h = &tconn->data.sbuf.header; + struct p_header *h = tconn->data.sbuf; return !_conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0); } @@ -4138,7 +4138,7 @@ static int drbd_disconnected(int vnr, void *p, void *data) static int drbd_send_handshake(struct drbd_tconn *tconn) { /* ASSERT current == mdev->tconn->receiver ... */ - struct p_handshake *p = &tconn->data.sbuf.handshake; + struct p_handshake *p = tconn->data.sbuf; int err; if (mutex_lock_interruptible(&tconn->data.mutex)) { diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 185cd25b18d..cbbc5c7cbef 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1193,7 +1193,7 @@ int w_send_barrier(struct drbd_work *w, int cancel) { struct drbd_tl_epoch *b = container_of(w, struct drbd_tl_epoch, w); struct drbd_conf *mdev = w->mdev; - struct p_barrier *p = &mdev->tconn->data.sbuf.barrier; + struct p_barrier *p = mdev->tconn->data.sbuf; int err = 0; /* really avoid racing with tl_clear. w.cb may have been referenced From 79ed9bd0532e672fde29281ca2801e6f70307205 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 24 Mar 2011 21:31:38 +0100 Subject: [PATCH 266/609] drbd: _drbd_send_bitmap(): Use the pre-allocated send buffer Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 16ec6c3bd7d..3e32836db55 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -790,9 +790,6 @@ int drbd_send_sync_param(struct drbd_conf *mdev) : apv <= 94 ? sizeof(struct p_rs_param_89) : /* apv >= 95 */ sizeof(struct p_rs_param_95); - /* used from admin command context and receiver/worker context. - * to avoid kmalloc, grab the socket right here, - * then use the pre-allocated sbuf there */ mutex_lock(&mdev->tconn->data.mutex); sock = mdev->tconn->data.socket; @@ -1147,10 +1144,9 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, * code upon failure. */ static int -send_bitmap_rle_or_plain(struct drbd_conf *mdev, - struct p_header *h, struct bm_xfer_ctx *c) +send_bitmap_rle_or_plain(struct drbd_conf *mdev, struct bm_xfer_ctx *c) { - struct p_compressed_bm *p = (void*)h; + struct p_compressed_bm *p = mdev->tconn->data.sbuf; unsigned long num_words; int len, err; @@ -1162,7 +1158,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, if (len) { dcbp_set_code(p, RLE_VLI_Bits); err = _drbd_send_cmd(mdev, mdev->tconn->data.socket, - P_COMPRESSED_BITMAP, h, + P_COMPRESSED_BITMAP, &p->head, sizeof(*p) + len, 0); c->packets[0]++; @@ -1173,10 +1169,12 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, } else { /* was not compressible. * send a buffer full of plain text bits instead. */ + struct p_header *h = mdev->tconn->data.sbuf; num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset); len = num_words * sizeof(long); if (len) - drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload); + drbd_bm_get_lel(mdev, c->word_offset, num_words, + (unsigned long *)h->payload); err = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_BITMAP, h, sizeof(struct p_header80) + len, 0); c->word_offset += num_words; @@ -1202,20 +1200,11 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, static int _drbd_send_bitmap(struct drbd_conf *mdev) { struct bm_xfer_ctx c; - struct p_header *p; int err; if (!expect(mdev->bitmap)) return false; - /* maybe we should use some per thread scratch page, - * and allocate that during initial device creation? */ - p = (struct p_header *) __get_free_page(GFP_NOIO); - if (!p) { - dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__); - return false; - } - if (get_ldev(mdev)) { if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) { dev_info(DEV, "Writing the whole bitmap, MDF_FullSync was set.\n"); @@ -1239,10 +1228,9 @@ static int _drbd_send_bitmap(struct drbd_conf *mdev) }; do { - err = send_bitmap_rle_or_plain(mdev, p, &c); + err = send_bitmap_rle_or_plain(mdev, &c); } while (err > 0); - free_page((unsigned long) p); return err == 0; } From deebe195798ef1f19df8c509414d936da8b38d6a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 25 Mar 2011 00:01:04 +0100 Subject: [PATCH 267/609] drbd: A small cleanup in drbdd() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e75eb270263..c6638c4b062 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3945,18 +3945,20 @@ static void drbdd(struct drbd_tconn *tconn) int err; while (get_t_state(&tconn->receiver) == RUNNING) { + struct data_cmd *cmd; + drbd_thread_current_set_cpu(&tconn->receiver); if (drbd_recv_header(tconn, &pi)) goto err_out; - if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || - !drbd_cmd_handler[pi.cmd].mdev_fn)) { + cmd = &drbd_cmd_handler[pi.cmd]; + if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->mdev_fn)) { conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size); goto err_out; } - shs = drbd_cmd_handler[pi.cmd].pkt_size - sizeof(struct p_header); - if (pi.size - shs > 0 && !drbd_cmd_handler[pi.cmd].expect_payload) { + shs = cmd->pkt_size - sizeof(struct p_header); + if (pi.size - shs > 0 && !cmd->expect_payload) { conn_err(tconn, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size); goto err_out; } @@ -3967,12 +3969,12 @@ static void drbdd(struct drbd_tconn *tconn) goto err_out; } - if (drbd_cmd_handler[pi.cmd].fa_type == CONN) { - err = drbd_cmd_handler[pi.cmd].conn_fn(tconn, pi.cmd, pi.size - shs); + if (cmd->fa_type == CONN) { + err = cmd->conn_fn(tconn, pi.cmd, pi.size - shs); } else { struct drbd_conf *mdev = vnr_to_mdev(tconn, pi.vnr); err = mdev ? - drbd_cmd_handler[pi.cmd].mdev_fn(mdev, pi.cmd, pi.size - shs) : + cmd->mdev_fn(mdev, pi.cmd, pi.size - shs) : tconn_receive_skip(tconn, pi.cmd, pi.size - shs); } From 49ba9b1bb3295fa690ae8f5091b093e61acf3ada Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 25 Mar 2011 00:35:45 +0100 Subject: [PATCH 268/609] drbd: Remove useless error messages These messages can only trigger in case there is a pretty obvious internal programming error. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c6638c4b062..edad6acb1d3 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2180,8 +2180,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC); break; default: - dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n", - cmdname(cmd)); + BUG(); } if (verb && __ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Can not satisfy peer's read request, " @@ -2267,10 +2266,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, break; default: - dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n", - cmdname(cmd)); - fault_type = DRBD_FAULT_MAX; - goto out_free_e; + BUG(); } /* Throttle, drbd_rs_begin_io and submit should become asynchronous From e28572167cfd882b16c12992bbcbbca3c4fa59fb Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 25 Mar 2011 00:57:38 +0100 Subject: [PATCH 269/609] drbd: Pass struct packet_info down to the receive functions Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 150 +++++++++++++---------------- 1 file changed, 65 insertions(+), 85 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index edad6acb1d3..cc729c4982c 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -50,8 +50,8 @@ struct packet_info { enum drbd_packet cmd; - int size; - int vnr; + unsigned int size; + unsigned int vnr; }; enum finish_epoch { @@ -1540,8 +1540,7 @@ find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id, return NULL; } -static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd, - unsigned int data_size) +static int receive_DataReply(struct drbd_conf *mdev, struct packet_info *pi) { struct drbd_request *req; sector_t sector; @@ -1559,7 +1558,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd, /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid * special casing it there for the various failure cases. * still no race with drbd_fail_pending_reads */ - err = recv_dless_read(mdev, req, sector, data_size); + err = recv_dless_read(mdev, req, sector, pi->size); if (!err) req_mod(req, DATA_RECEIVED); /* else: nothing. handled from drbd_disconnect... @@ -1569,8 +1568,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd, return err; } -static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd, - unsigned int data_size) +static int receive_RSDataReply(struct drbd_conf *mdev, struct packet_info *pi) { sector_t sector; int err; @@ -1583,17 +1581,17 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd, /* data is submitted to disk within recv_resync_read. * corresponding put_ldev done below on error, * or in drbd_peer_request_endio. */ - err = recv_resync_read(mdev, sector, data_size); + err = recv_resync_read(mdev, sector, pi->size); } else { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Can not write resync data to local disk.\n"); - err = drbd_drain_block(mdev, data_size); + err = drbd_drain_block(mdev, pi->size); - drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size); + drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size); } - atomic_add(data_size >> 9, &mdev->rs_sect_in); + atomic_add(pi->size >> 9, &mdev->rs_sect_in); return err; } @@ -1958,8 +1956,7 @@ static int handle_write_conflicts(struct drbd_conf *mdev, } /* mirrored write */ -static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, - unsigned int data_size) +static int receive_Data(struct drbd_conf *mdev, struct packet_info *pi) { sector_t sector; struct drbd_peer_request *peer_req; @@ -1973,9 +1970,9 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, int err2; err = wait_for_and_update_peer_seq(mdev, peer_seq); - drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size); + drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size); atomic_inc(&mdev->current_epoch->epoch_size); - err2 = drbd_drain_block(mdev, data_size); + err2 = drbd_drain_block(mdev, pi->size); if (!err) err = err2; return err; @@ -1988,7 +1985,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, */ sector = be64_to_cpu(p->sector); - peer_req = read_in_block(mdev, p->block_id, sector, data_size); + peer_req = read_in_block(mdev, p->block_id, sector, pi->size); if (!peer_req) { put_ldev(mdev); return -EIO; @@ -2138,8 +2135,7 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector) } -static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, - unsigned int digest_size) +static int receive_DataRequest(struct drbd_conf *mdev, struct packet_info *pi) { sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); @@ -2165,7 +2161,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) { verb = 1; - switch (cmd) { + switch (pi->cmd) { case P_DATA_REQUEST: drbd_send_ack_rp(mdev, P_NEG_DREPLY, p); break; @@ -2187,7 +2183,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, "no local data.\n"); /* drain possibly payload */ - return drbd_drain_block(mdev, digest_size); + return drbd_drain_block(mdev, pi->size); } /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD @@ -2199,7 +2195,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, return -ENOMEM; } - switch (cmd) { + switch (pi->cmd) { case P_DATA_REQUEST: peer_req->w.cb = w_e_end_data_req; fault_type = DRBD_FAULT_DT_RD; @@ -2216,25 +2212,25 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, case P_OV_REPLY: case P_CSUM_RS_REQUEST: fault_type = DRBD_FAULT_RS_RD; - di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO); + di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO); if (!di) goto out_free_e; - di->digest_size = digest_size; + di->digest_size = pi->size; di->digest = (((char *)di)+sizeof(struct digest_info)); peer_req->digest = di; peer_req->flags |= EE_HAS_DIGEST; - if (drbd_recv(mdev->tconn, di->digest, digest_size) != digest_size) + if (drbd_recv_all(mdev->tconn, di->digest, pi->size)) goto out_free_e; - if (cmd == P_CSUM_RS_REQUEST) { + if (pi->cmd == P_CSUM_RS_REQUEST) { D_ASSERT(mdev->tconn->agreed_pro_version >= 89); peer_req->w.cb = w_e_end_csum_rs_req; /* used in the sector offset progress display */ mdev->bm_resync_fo = BM_SECT_TO_BIT(sector); - } else if (cmd == P_OV_REPLY) { + } else if (pi->cmd == P_OV_REPLY) { /* track progress, we may need to throttle */ atomic_add(size >> 9, &mdev->rs_sect_in); peer_req->w.cb = w_e_end_ov_reply; @@ -2860,8 +2856,7 @@ static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self) return 1; } -static int receive_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd, - unsigned int data_size) +static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) { struct p_protocol *p = tconn->data.rbuf; int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; @@ -2915,7 +2910,7 @@ static int receive_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd, unsigned char *my_alg = tconn->net_conf->integrity_alg; int err; - err = drbd_recv_all(tconn, p_integrity_alg, data_size); + err = drbd_recv_all(tconn, p_integrity_alg, pi->size); if (err) return err; @@ -2962,8 +2957,7 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, return tfm; } -static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, - unsigned int packet_size) +static int receive_SyncParam(struct drbd_conf *mdev, struct packet_info *pi) { struct p_rs_param_95 *p = mdev->tconn->data.rbuf; unsigned int header_size, data_size, exp_max_sz; @@ -2980,22 +2974,22 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, : apv <= 94 ? sizeof(struct p_rs_param_89) : /* apv >= 95 */ sizeof(struct p_rs_param_95); - if (packet_size > exp_max_sz) { + if (pi->size > exp_max_sz) { dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n", - packet_size, exp_max_sz); + pi->size, exp_max_sz); return -EIO; } if (apv <= 88) { header_size = sizeof(struct p_rs_param) - sizeof(struct p_header); - data_size = packet_size - header_size; + data_size = pi->size - header_size; } else if (apv <= 94) { header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header); - data_size = packet_size - header_size; + data_size = pi->size - header_size; D_ASSERT(data_size == 0); } else { header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header); - data_size = packet_size - header_size; + data_size = pi->size - header_size; D_ASSERT(data_size == 0); } @@ -3134,8 +3128,7 @@ static void warn_if_differ_considerably(struct drbd_conf *mdev, (unsigned long long)a, (unsigned long long)b); } -static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd, - unsigned int data_size) +static int receive_sizes(struct drbd_conf *mdev, struct packet_info *pi) { struct p_sizes *p = mdev->tconn->data.rbuf; enum determine_dev_size dd = unchanged; @@ -3232,8 +3225,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd, return 0; } -static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd, - unsigned int data_size) +static int receive_uuids(struct drbd_conf *mdev, struct packet_info *pi) { struct p_uuids *p = mdev->tconn->data.rbuf; u64 *p_uuid; @@ -3328,8 +3320,7 @@ static union drbd_state convert_state(union drbd_state ps) return ms; } -static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd, - unsigned int data_size) +static int receive_req_state(struct drbd_conf *mdev, struct packet_info *pi) { struct p_req_state *p = mdev->tconn->data.rbuf; union drbd_state mask, val; @@ -3355,8 +3346,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd, return 0; } -static int receive_req_conn_state(struct drbd_tconn *tconn, enum drbd_packet cmd, - unsigned int data_size) +static int receive_req_conn_state(struct drbd_tconn *tconn, struct packet_info *pi) { struct p_req_state *p = tconn->data.rbuf; union drbd_state mask, val; @@ -3380,8 +3370,7 @@ static int receive_req_conn_state(struct drbd_tconn *tconn, enum drbd_packet cmd return 0; } -static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd, - unsigned int data_size) +static int receive_state(struct drbd_conf *mdev, struct packet_info *pi) { struct p_state *p = mdev->tconn->data.rbuf; union drbd_state os, ns, peer_state; @@ -3533,8 +3522,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd, return 0; } -static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd, - unsigned int data_size) +static int receive_sync_uuid(struct drbd_conf *mdev, struct packet_info *pi) { struct p_rs_uuid *p = mdev->tconn->data.rbuf; @@ -3743,13 +3731,11 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev, in order to be agnostic to the 32 vs 64 bits issue. returns 0 on failure, 1 if we successfully received it. */ -static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, - unsigned int data_size) +static int receive_bitmap(struct drbd_conf *mdev, struct packet_info *pi) { struct bm_xfer_ctx c; int err; struct p_header *h = mdev->tconn->data.rbuf; - struct packet_info pi; drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED); /* you are supposed to send additional out-of-sync information @@ -3761,48 +3747,46 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, }; for(;;) { - if (cmd == P_BITMAP) { - err = receive_bitmap_plain(mdev, data_size, h, &c); - } else if (cmd == P_COMPRESSED_BITMAP) { + if (pi->cmd == P_BITMAP) { + err = receive_bitmap_plain(mdev, pi->size, h, &c); + } else if (pi->cmd == P_COMPRESSED_BITMAP) { /* MAYBE: sanity check that we speak proto >= 90, * and the feature is enabled! */ struct p_compressed_bm *p; - if (data_size > BM_PACKET_PAYLOAD_BYTES) { + if (pi->size > BM_PACKET_PAYLOAD_BYTES) { dev_err(DEV, "ReportCBitmap packet too large\n"); err = -EIO; goto out; } p = mdev->tconn->data.rbuf; - err = drbd_recv_all(mdev->tconn, p->head.payload, data_size); + err = drbd_recv_all(mdev->tconn, p->head.payload, pi->size); if (err) goto out; - if (data_size <= (sizeof(*p) - sizeof(p->head))) { - dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size); + if (pi->size <= (sizeof(*p) - sizeof(p->head))) { + dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", pi->size); err = -EIO; goto out; } - err = decode_bitmap_c(mdev, p, &c, data_size); + err = decode_bitmap_c(mdev, p, &c, pi->size); } else { - dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd); + dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd); err = -EIO; goto out; } - c.packets[cmd == P_BITMAP]++; - c.bytes[cmd == P_BITMAP] += sizeof(struct p_header) + data_size; + c.packets[pi->cmd == P_BITMAP]++; + c.bytes[pi->cmd == P_BITMAP] += sizeof(struct p_header) + pi->size; if (err <= 0) { if (err < 0) goto out; break; } - err = drbd_recv_header(mdev->tconn, &pi); + err = drbd_recv_header(mdev->tconn, pi); if (err) goto out; - cmd = pi.cmd; - data_size = pi.size; } INFO_bm_xfer_stats(mdev, "receive", &c); @@ -3848,25 +3832,23 @@ static int _tconn_receive_skip(struct drbd_tconn *tconn, unsigned int data_size) return size ? -EIO : 0; } -static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd, - unsigned int data_size) +static int receive_skip(struct drbd_conf *mdev, struct packet_info *pi) { dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n", - cmd, data_size); + pi->cmd, pi->size); - return _tconn_receive_skip(mdev->tconn, data_size); + return _tconn_receive_skip(mdev->tconn, pi->size); } -static int tconn_receive_skip(struct drbd_tconn *tconn, enum drbd_packet cmd, unsigned int data_size) +static int tconn_receive_skip(struct drbd_tconn *tconn, struct packet_info *pi) { conn_warn(tconn, "skipping packet for non existing volume type %d, l: %d!\n", - cmd, data_size); + pi->cmd, pi->size); - return _tconn_receive_skip(tconn, data_size); + return _tconn_receive_skip(tconn, pi->size); } -static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd, - unsigned int data_size) +static int receive_UnplugRemote(struct drbd_conf *mdev, struct packet_info *pi) { /* Make sure we've acked all the TCP data associated * with the data requests being unplugged */ @@ -3875,8 +3857,7 @@ static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd, return 0; } -static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd, - unsigned int data_size) +static int receive_out_of_sync(struct drbd_conf *mdev, struct packet_info *pi) { struct p_block_desc *p = mdev->tconn->data.rbuf; @@ -3900,10 +3881,8 @@ struct data_cmd { size_t pkt_size; enum mdev_or_conn fa_type; /* first argument's type */ union { - int (*mdev_fn)(struct drbd_conf *, enum drbd_packet cmd, - unsigned int to_receive); - int (*conn_fn)(struct drbd_tconn *, enum drbd_packet cmd, - unsigned int to_receive); + int (*mdev_fn)(struct drbd_conf *, struct packet_info *); + int (*conn_fn)(struct drbd_tconn *, struct packet_info *); }; }; @@ -3963,15 +3942,16 @@ static void drbdd(struct drbd_tconn *tconn) err = drbd_recv_all_warn(tconn, &header->payload, shs); if (err) goto err_out; + pi.size -= shs; } - if (cmd->fa_type == CONN) { - err = cmd->conn_fn(tconn, pi.cmd, pi.size - shs); - } else { + if (cmd->fa_type == CONN) + err = cmd->conn_fn(tconn, &pi); + else { struct drbd_conf *mdev = vnr_to_mdev(tconn, pi.vnr); err = mdev ? - cmd->mdev_fn(mdev, pi.cmd, pi.size - shs) : - tconn_receive_skip(tconn, pi.cmd, pi.size - shs); + cmd->mdev_fn(mdev, &pi) : + tconn_receive_skip(tconn, &pi); } if (unlikely(err)) { From 4a76b1612f35d8c3e5c3e77e10c7d72eed79dfe0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 25 Mar 2011 02:43:51 +0100 Subject: [PATCH 270/609] drbd: Map from (connection, volume number) to device in the receive handlers The receive handlers do not all handle unknown volume numbers the same way. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 262 ++++++++++++++++++----------- 1 file changed, 164 insertions(+), 98 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index cc729c4982c..3243c784991 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1232,13 +1232,17 @@ static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev, wake_up(&mdev->misc_wait); } -static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd, - unsigned int data_size) +static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi) { + struct drbd_conf *mdev; int rv; - struct p_barrier *p = mdev->tconn->data.rbuf; + struct p_barrier *p = tconn->data.rbuf; struct drbd_epoch *epoch; + mdev = vnr_to_mdev(tconn, pi->vnr); + if (!mdev) + return -EIO; + inc_unacked(mdev); mdev->current_epoch->barrier_nr = p->barrier; @@ -1540,12 +1544,17 @@ find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id, return NULL; } -static int receive_DataReply(struct drbd_conf *mdev, struct packet_info *pi) +static int receive_DataReply(struct drbd_tconn *tconn, struct packet_info *pi) { + struct drbd_conf *mdev; struct drbd_request *req; sector_t sector; int err; - struct p_data *p = mdev->tconn->data.rbuf; + struct p_data *p = tconn->data.rbuf; + + mdev = vnr_to_mdev(tconn, pi->vnr); + if (!mdev) + return -EIO; sector = be64_to_cpu(p->sector); @@ -1568,11 +1577,16 @@ static int receive_DataReply(struct drbd_conf *mdev, struct packet_info *pi) return err; } -static int receive_RSDataReply(struct drbd_conf *mdev, struct packet_info *pi) +static int receive_RSDataReply(struct drbd_tconn *tconn, struct packet_info *pi) { + struct drbd_conf *mdev; sector_t sector; int err; - struct p_data *p = mdev->tconn->data.rbuf; + struct p_data *p = tconn->data.rbuf; + + mdev = vnr_to_mdev(tconn, pi->vnr); + if (!mdev) + return -EIO; sector = be64_to_cpu(p->sector); D_ASSERT(p->block_id == ID_SYNCER); @@ -1956,16 +1970,21 @@ static int handle_write_conflicts(struct drbd_conf *mdev, } /* mirrored write */ -static int receive_Data(struct drbd_conf *mdev, struct packet_info *pi) +static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi) { + struct drbd_conf *mdev; sector_t sector; struct drbd_peer_request *peer_req; - struct p_data *p = mdev->tconn->data.rbuf; + struct p_data *p = tconn->data.rbuf; u32 peer_seq = be32_to_cpu(p->seq_num); int rw = WRITE; u32 dp_flags; int err; + mdev = vnr_to_mdev(tconn, pi->vnr); + if (!mdev) + return -EIO; + if (!get_ldev(mdev)) { int err2; @@ -2135,15 +2154,21 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector) } -static int receive_DataRequest(struct drbd_conf *mdev, struct packet_info *pi) +static int receive_DataRequest(struct drbd_tconn *tconn, struct packet_info *pi) { + struct drbd_conf *mdev; sector_t sector; - const sector_t capacity = drbd_get_capacity(mdev->this_bdev); + sector_t capacity; struct drbd_peer_request *peer_req; struct digest_info *di = NULL; int size, verb; unsigned int fault_type; - struct p_block_req *p = mdev->tconn->data.rbuf; + struct p_block_req *p = tconn->data.rbuf; + + mdev = vnr_to_mdev(tconn, pi->vnr); + if (!mdev) + return -EIO; + capacity = drbd_get_capacity(mdev->this_bdev); sector = be64_to_cpu(p->sector); size = be32_to_cpu(p->blksize); @@ -2957,17 +2982,60 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, return tfm; } -static int receive_SyncParam(struct drbd_conf *mdev, struct packet_info *pi) +static int ignore_remaining_packet(struct drbd_tconn *tconn, struct packet_info *pi) { - struct p_rs_param_95 *p = mdev->tconn->data.rbuf; + void *buffer = tconn->data.rbuf; + int size = pi->size; + + while (size) { + int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE); + s = drbd_recv(tconn, buffer, s); + if (s <= 0) { + if (s < 0) + return s; + break; + } + size -= s; + } + if (size) + return -EIO; + return 0; +} + +/* + * config_unknown_volume - device configuration command for unknown volume + * + * When a device is added to an existing connection, the node on which the + * device is added first will send configuration commands to its peer but the + * peer will not know about the device yet. It will warn and ignore these + * commands. Once the device is added on the second node, the second node will + * send the same device configuration commands, but in the other direction. + * + * (We can also end up here if drbd is misconfigured.) + */ +static int config_unknown_volume(struct drbd_tconn *tconn, struct packet_info *pi) +{ + conn_warn(tconn, "Volume %u unknown; ignoring %s packet\n", + pi->vnr, cmdname(pi->cmd)); + return ignore_remaining_packet(tconn, pi); +} + +static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) +{ + struct drbd_conf *mdev; + struct p_rs_param_95 *p = tconn->data.rbuf; unsigned int header_size, data_size, exp_max_sz; struct crypto_hash *verify_tfm = NULL; struct crypto_hash *csums_tfm = NULL; - const int apv = mdev->tconn->agreed_pro_version; + const int apv = tconn->agreed_pro_version; int *rs_plan_s = NULL; int fifo_size = 0; int err; + mdev = vnr_to_mdev(tconn, pi->vnr); + if (!mdev) + return config_unknown_volume(tconn, pi); + exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param) : apv == 88 ? sizeof(struct p_rs_param) + SHARED_SECRET_MAX @@ -3128,14 +3196,19 @@ static void warn_if_differ_considerably(struct drbd_conf *mdev, (unsigned long long)a, (unsigned long long)b); } -static int receive_sizes(struct drbd_conf *mdev, struct packet_info *pi) +static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi) { - struct p_sizes *p = mdev->tconn->data.rbuf; + struct drbd_conf *mdev; + struct p_sizes *p = tconn->data.rbuf; enum determine_dev_size dd = unchanged; sector_t p_size, p_usize, my_usize; int ldsc = 0; /* local disk size changed */ enum dds_flags ddsf; + mdev = vnr_to_mdev(tconn, pi->vnr); + if (!mdev) + return config_unknown_volume(tconn, pi); + p_size = be64_to_cpu(p->d_size); p_usize = be64_to_cpu(p->u_size); @@ -3225,12 +3298,17 @@ static int receive_sizes(struct drbd_conf *mdev, struct packet_info *pi) return 0; } -static int receive_uuids(struct drbd_conf *mdev, struct packet_info *pi) +static int receive_uuids(struct drbd_tconn *tconn, struct packet_info *pi) { - struct p_uuids *p = mdev->tconn->data.rbuf; + struct drbd_conf *mdev; + struct p_uuids *p = tconn->data.rbuf; u64 *p_uuid; int i, updated_uuids = 0; + mdev = vnr_to_mdev(tconn, pi->vnr); + if (!mdev) + return config_unknown_volume(tconn, pi); + p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO); for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++) @@ -3320,12 +3398,17 @@ static union drbd_state convert_state(union drbd_state ps) return ms; } -static int receive_req_state(struct drbd_conf *mdev, struct packet_info *pi) +static int receive_req_state(struct drbd_tconn *tconn, struct packet_info *pi) { - struct p_req_state *p = mdev->tconn->data.rbuf; + struct drbd_conf *mdev; + struct p_req_state *p = tconn->data.rbuf; union drbd_state mask, val; enum drbd_state_rv rv; + mdev = vnr_to_mdev(tconn, pi->vnr); + if (!mdev) + return -EIO; + mask.i = be32_to_cpu(p->mask); val.i = be32_to_cpu(p->val); @@ -3370,14 +3453,19 @@ static int receive_req_conn_state(struct drbd_tconn *tconn, struct packet_info * return 0; } -static int receive_state(struct drbd_conf *mdev, struct packet_info *pi) +static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) { - struct p_state *p = mdev->tconn->data.rbuf; + struct drbd_conf *mdev; + struct p_state *p = tconn->data.rbuf; union drbd_state os, ns, peer_state; enum drbd_disk_state real_peer_disk; enum chg_state_flags cs_flags; int rv; + mdev = vnr_to_mdev(tconn, pi->vnr); + if (!mdev) + return config_unknown_volume(tconn, pi); + peer_state.i = be32_to_cpu(p->state); real_peer_disk = peer_state.disk; @@ -3522,9 +3610,14 @@ static int receive_state(struct drbd_conf *mdev, struct packet_info *pi) return 0; } -static int receive_sync_uuid(struct drbd_conf *mdev, struct packet_info *pi) +static int receive_sync_uuid(struct drbd_tconn *tconn, struct packet_info *pi) { - struct p_rs_uuid *p = mdev->tconn->data.rbuf; + struct drbd_conf *mdev; + struct p_rs_uuid *p = tconn->data.rbuf; + + mdev = vnr_to_mdev(tconn, pi->vnr); + if (!mdev) + return -EIO; wait_event(mdev->misc_wait, mdev->state.conn == C_WF_SYNC_UUID || @@ -3731,11 +3824,16 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev, in order to be agnostic to the 32 vs 64 bits issue. returns 0 on failure, 1 if we successfully received it. */ -static int receive_bitmap(struct drbd_conf *mdev, struct packet_info *pi) +static int receive_bitmap(struct drbd_tconn *tconn, struct packet_info *pi) { + struct drbd_conf *mdev; struct bm_xfer_ctx c; int err; - struct p_header *h = mdev->tconn->data.rbuf; + struct p_header *h = tconn->data.rbuf; + + mdev = vnr_to_mdev(tconn, pi->vnr); + if (!mdev) + return -EIO; drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED); /* you are supposed to send additional out-of-sync information @@ -3815,51 +3913,31 @@ static int receive_bitmap(struct drbd_conf *mdev, struct packet_info *pi) return err; } -static int _tconn_receive_skip(struct drbd_tconn *tconn, unsigned int data_size) +static int receive_skip(struct drbd_tconn *tconn, struct packet_info *pi) { - /* TODO zero copy sink :) */ - static char sink[128]; - int size, want, r; - - size = data_size; - while (size > 0) { - want = min_t(int, size, sizeof(sink)); - r = drbd_recv(tconn, sink, want); - if (r <= 0) - break; - size -= r; - } - return size ? -EIO : 0; -} - -static int receive_skip(struct drbd_conf *mdev, struct packet_info *pi) -{ - dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n", + conn_warn(tconn, "skipping unknown optional packet type %d, l: %d!\n", pi->cmd, pi->size); - return _tconn_receive_skip(mdev->tconn, pi->size); + return ignore_remaining_packet(tconn, pi); } -static int tconn_receive_skip(struct drbd_tconn *tconn, struct packet_info *pi) -{ - conn_warn(tconn, "skipping packet for non existing volume type %d, l: %d!\n", - pi->cmd, pi->size); - - return _tconn_receive_skip(tconn, pi->size); -} - -static int receive_UnplugRemote(struct drbd_conf *mdev, struct packet_info *pi) +static int receive_UnplugRemote(struct drbd_tconn *tconn, struct packet_info *pi) { /* Make sure we've acked all the TCP data associated * with the data requests being unplugged */ - drbd_tcp_quickack(mdev->tconn->data.socket); + drbd_tcp_quickack(tconn->data.socket); return 0; } -static int receive_out_of_sync(struct drbd_conf *mdev, struct packet_info *pi) +static int receive_out_of_sync(struct drbd_tconn *tconn, struct packet_info *pi) { - struct p_block_desc *p = mdev->tconn->data.rbuf; + struct drbd_conf *mdev; + struct p_block_desc *p = tconn->data.rbuf; + + mdev = vnr_to_mdev(tconn, pi->vnr); + if (!mdev) + return -EIO; switch (mdev->state.conn) { case C_WF_SYNC_UUID: @@ -3879,37 +3957,33 @@ static int receive_out_of_sync(struct drbd_conf *mdev, struct packet_info *pi) struct data_cmd { int expect_payload; size_t pkt_size; - enum mdev_or_conn fa_type; /* first argument's type */ - union { - int (*mdev_fn)(struct drbd_conf *, struct packet_info *); - int (*conn_fn)(struct drbd_tconn *, struct packet_info *); - }; + int (*fn)(struct drbd_tconn *, struct packet_info *); }; static struct data_cmd drbd_cmd_handler[] = { - [P_DATA] = { 1, sizeof(struct p_data), MDEV, { receive_Data } }, - [P_DATA_REPLY] = { 1, sizeof(struct p_data), MDEV, { receive_DataReply } }, - [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), MDEV, { receive_RSDataReply } } , - [P_BARRIER] = { 0, sizeof(struct p_barrier), MDEV, { receive_Barrier } } , - [P_BITMAP] = { 1, sizeof(struct p_header), MDEV, { receive_bitmap } } , - [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), MDEV, { receive_bitmap } } , - [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), MDEV, { receive_UnplugRemote } }, - [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } }, - [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } }, - [P_SYNC_PARAM] = { 1, sizeof(struct p_header), MDEV, { receive_SyncParam } }, - [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), MDEV, { receive_SyncParam } }, - [P_PROTOCOL] = { 1, sizeof(struct p_protocol), CONN, { .conn_fn = receive_protocol } }, - [P_UUIDS] = { 0, sizeof(struct p_uuids), MDEV, { receive_uuids } }, - [P_SIZES] = { 0, sizeof(struct p_sizes), MDEV, { receive_sizes } }, - [P_STATE] = { 0, sizeof(struct p_state), MDEV, { receive_state } }, - [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), MDEV, { receive_req_state } }, - [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), MDEV, { receive_sync_uuid } }, - [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } }, - [P_OV_REPLY] = { 1, sizeof(struct p_block_req), MDEV, { receive_DataRequest } }, - [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), MDEV, { receive_DataRequest } }, - [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), MDEV, { receive_skip } }, - [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), MDEV, { receive_out_of_sync } }, - [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), CONN, { .conn_fn = receive_req_conn_state } }, + [P_DATA] = { 1, sizeof(struct p_data), receive_Data }, + [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply }, + [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } , + [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } , + [P_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } , + [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } , + [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), receive_UnplugRemote }, + [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, + [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, + [P_SYNC_PARAM] = { 1, sizeof(struct p_header), receive_SyncParam }, + [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), receive_SyncParam }, + [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol }, + [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids }, + [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes }, + [P_STATE] = { 0, sizeof(struct p_state), receive_state }, + [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state }, + [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid }, + [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, + [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest }, + [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest }, + [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip }, + [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync }, + [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state }, }; static void drbdd(struct drbd_tconn *tconn) @@ -3927,7 +4001,7 @@ static void drbdd(struct drbd_tconn *tconn) goto err_out; cmd = &drbd_cmd_handler[pi.cmd]; - if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->mdev_fn)) { + if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) { conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size); goto err_out; } @@ -3945,16 +4019,8 @@ static void drbdd(struct drbd_tconn *tconn) pi.size -= shs; } - if (cmd->fa_type == CONN) - err = cmd->conn_fn(tconn, &pi); - else { - struct drbd_conf *mdev = vnr_to_mdev(tconn, pi.vnr); - err = mdev ? - cmd->mdev_fn(mdev, &pi) : - tconn_receive_skip(tconn, &pi); - } - - if (unlikely(err)) { + err = cmd->fn(tconn, &pi); + if (err) { conn_err(tconn, "error receiving %s, l: %d!\n", cmdname(pi.cmd), pi.size); goto err_out; From bd0c824a9db54bfbf5a26a5bb9b604d65c25b904 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 25 Mar 2011 12:02:20 +0100 Subject: [PATCH 271/609] drbd: Use the idr_for_each_entry() iterator instead of idr_for_each() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 148 ++++++++++++++++---------------- 1 file changed, 74 insertions(+), 74 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index b4f668db329..85cfbeadd51 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1378,9 +1378,6 @@ static void print_conn_state_change(struct drbd_tconn *tconn, enum drbd_conns oc } struct _is_valid_itr_params { - enum chg_state_flags flags; - union drbd_state mask, val; - union drbd_state ms; /* maximal state, over all mdevs */ enum drbd_conns oc; enum { OC_UNINITIALIZED, @@ -1389,69 +1386,86 @@ struct _is_valid_itr_params { } oc_state; }; -static int _is_valid_itr_fn(int vnr, void *p, void *data) +static enum drbd_state_rv +conn_is_valid_transition(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, + enum chg_state_flags flags, struct _is_valid_itr_params *params) { - struct drbd_conf *mdev = (struct drbd_conf *)p; - struct _is_valid_itr_params *params = (struct _is_valid_itr_params *)data; - enum chg_state_flags flags = params->flags; + enum drbd_state_rv rv = SS_SUCCESS; union drbd_state ns, os; - enum drbd_state_rv rv; + struct drbd_conf *mdev; + int vnr; - os = mdev->state; - ns = apply_mask_val(os, params->mask, params->val); - ns = sanitize_state(mdev, ns, NULL); - rv = is_valid_state(mdev, ns); + params->oc_state = OC_UNINITIALIZED; + idr_for_each_entry(&tconn->volumes, mdev, vnr) { + os = mdev->state; + ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL); - if (rv < SS_SUCCESS) { - /* If the old state was illegal as well, then let this happen...*/ + switch (params->oc_state) { + case OC_UNINITIALIZED: + params->oc = os.conn; + params->oc_state = OC_CONSISTENT; + break; + case OC_CONSISTENT: + if (params->oc != os.conn) + params->oc_state = OC_INCONSISTENT; + break; + case OC_INCONSISTENT: + break; + } - if (is_valid_state(mdev, os) == rv) - rv = is_valid_soft_transition(os, ns); - } else - rv = is_valid_soft_transition(os, ns); + if (ns.i == os.i) + continue; - switch (params->oc_state) { - case OC_UNINITIALIZED: - params->oc = os.conn; - params->oc_state = OC_CONSISTENT; - break; - case OC_CONSISTENT: - if (params->oc != os.conn) - params->oc_state = OC_INCONSISTENT; - break; - case OC_INCONSISTENT: - break; + rv = is_valid_transition(os, ns); + if (rv < SS_SUCCESS) + break; + + if (!(flags & CS_HARD)) { + rv = is_valid_state(mdev, ns); + if (rv < SS_SUCCESS) { + if (is_valid_state(mdev, os) == rv) + rv = is_valid_soft_transition(os, ns); + } else + rv = is_valid_soft_transition(os, ns); + } + if (rv < SS_SUCCESS) + break; } - if (rv < SS_SUCCESS) { - if (flags & CS_VERBOSE) - print_st_err(mdev, os, ns, rv); - return rv; - } else - return 0; + if (rv < SS_SUCCESS && flags & CS_VERBOSE) + print_st_err(mdev, os, ns, rv); + + return rv; } -static int _set_state_itr_fn(int vnr, void *p, void *data) +static union drbd_state +conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, + enum chg_state_flags flags) { - struct drbd_conf *mdev = (struct drbd_conf *)p; - struct _is_valid_itr_params *params = (struct _is_valid_itr_params *)data; - enum chg_state_flags flags = params->flags; - union drbd_state os, ns, ms = params->ms; + union drbd_state ns, os, ms = { }; + struct drbd_conf *mdev; enum drbd_state_rv rv; + int vnr; - os = mdev->state; - ns = apply_mask_val(os, params->mask, params->val); - ns = sanitize_state(mdev, ns, NULL); + if (mask.conn == C_MASK) + tconn->cstate = val.conn; - rv = __drbd_set_state(mdev, ns, flags, NULL); + idr_for_each_entry(&tconn->volumes, mdev, vnr) { + os = mdev->state; + ns = apply_mask_val(os, mask, val); + ns = sanitize_state(mdev, ns, NULL); - ms.role = max_role(ns.role, ms.role); - ms.peer = max_role(ns.peer, ms.peer); - ms.disk = max_t(enum drbd_role, mdev->state.disk, ms.disk); - ms.pdsk = max_t(enum drbd_role, mdev->state.pdsk, ms.pdsk); - params->ms = ms; + rv = __drbd_set_state(mdev, ns, flags, NULL); + if (rv < SS_SUCCESS) + BUG(); - return 0; + ms.role = max_role(mdev->state.role, ms.role); + ms.peer = max_role(mdev->state.peer, ms.peer); + ms.disk = max_t(enum drbd_disk_state, mdev->state.disk, ms.disk); + ms.pdsk = max_t(enum drbd_disk_state, mdev->state.pdsk, ms.pdsk); + } + + return ms; } static enum drbd_state_rv @@ -1466,18 +1480,14 @@ _conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags)) return SS_CW_FAILED_BY_PEER; - params.flags = CS_NO_CSTATE_CHG; /* öö think */ - params.mask = mask; - params.val = val; - spin_lock_irq(&tconn->req_lock); rv = tconn->cstate != C_WF_REPORT_PARAMS ? SS_CW_NO_NEED : SS_UNKNOWN_ERROR; if (rv == SS_UNKNOWN_ERROR) - rv = idr_for_each(&tconn->volumes, _is_valid_itr_fn, ¶ms); + rv = conn_is_valid_transition(tconn, mask, val, CS_NO_CSTATE_CHG, ¶ms); - if (rv == 0) /* idr_for_each semantics */ - rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ + if (rv == SS_SUCCESS) + rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ spin_unlock_irq(&tconn->req_lock); @@ -1517,22 +1527,13 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ struct _is_valid_itr_params params; struct after_conn_state_chg_work *acscw; enum drbd_conns oc = tconn->cstate; + union drbd_state ms; rv = is_valid_conn_transition(oc, val.conn); if (rv < SS_SUCCESS) goto abort; - params.flags = flags; - params.mask = mask; - params.val = val; - params.oc_state = OC_UNINITIALIZED; - - if (!(flags & CS_HARD)) - rv = idr_for_each(&tconn->volumes, _is_valid_itr_fn, ¶ms); - - if (rv == 0) /* idr_for_each semantics */ - rv = SS_SUCCESS; - + rv = conn_is_valid_transition(tconn, mask, val, flags, ¶ms); if (rv < SS_SUCCESS) goto abort; @@ -1546,17 +1547,16 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ if (params.oc_state == OC_CONSISTENT) { oc = params.oc; print_conn_state_change(tconn, oc, val.conn); - params.flags |= CS_NO_CSTATE_CHG; + flags |= CS_NO_CSTATE_CHG; } - tconn->cstate = val.conn; - params.ms.i = 0; - params.ms.conn = val.conn; - idr_for_each(&tconn->volumes, _set_state_itr_fn, ¶ms); + + ms = conn_set_state(tconn, mask, val, flags); + ms.conn = val.conn; acscw = kmalloc(sizeof(*acscw), GFP_ATOMIC); if (acscw) { acscw->oc = oc; - acscw->nms = params.ms; + acscw->nms = ms; acscw->flags = flags; acscw->w.cb = w_after_conn_state_ch; acscw->w.tconn = tconn; From 88ef594ed7367a76e53f4a33b3d866f4dfb1d4ff Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 25 Mar 2011 14:31:11 +0100 Subject: [PATCH 272/609] drbd: Fixed logging of old connection state During a disconnect the oc variable in _conn_request_state() could become outdated. Determin the common old state after sleeping. While at it, I implemented that for all parts of the state Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 117 ++++++++++++++++++++++++-------- 1 file changed, 87 insertions(+), 30 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 85cfbeadd51..2a170bf7cc2 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1377,42 +1377,100 @@ static void print_conn_state_change(struct drbd_tconn *tconn, enum drbd_conns oc conn_info(tconn, "%s\n", pb); } -struct _is_valid_itr_params { - enum drbd_conns oc; - enum { - OC_UNINITIALIZED, - OC_CONSISTENT, - OC_INCONSISTENT, - } oc_state; -}; +enum sp_state { + OC_UNINITIALIZED, + OC_CONSISTENT, + OC_INCONSISTENT, +} oc_state; + +static void common_state_part(enum sp_state *sps, int *sp, int nsp) +{ + switch (*sps) { + case OC_UNINITIALIZED: + *sp = nsp; + *sps = OC_CONSISTENT; + break; + case OC_CONSISTENT: + if (*sp != nsp) + *sps = OC_INCONSISTENT; + break; + case OC_INCONSISTENT: + break; + } +} + +void conn_old_common_state(struct drbd_tconn *tconn, union drbd_state *pcs, union drbd_state *pmask) +{ + union drbd_state css = {}; /* common state state */ + union drbd_state os, cs = {}; /* old_state, common_state */ + union drbd_state mask = {}; + enum sp_state sps; /* state part state */ + int sp; /* state part */ + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) { + os = mdev->state; + + sps = css.role; + sp = cs.role; + common_state_part(&sps, &sp, os.role); + css.role = sps; + cs.role = sp; + + sps = css.peer; + sp = cs.peer; + common_state_part(&sps, &sp, os.peer); + css.peer = sps; + cs.peer = sp; + + sps = css.conn; + sp = cs.conn; + common_state_part(&sps, &sp, os.conn); + css.conn = sps; + cs.conn = sp; + + sps = css.disk; + sp = cs.disk; + common_state_part(&sps, &sp, os.disk); + css.disk = sps; + cs.disk = sp; + + sps = css.pdsk; + sp = cs.pdsk; + common_state_part(&sps, &sp, os.pdsk); + css.pdsk = sps; + cs.pdsk = sp; + } + + if (css.role == OC_CONSISTENT) + mask.role = R_MASK; + if (css.peer == OC_CONSISTENT) + mask.peer = R_MASK; + if (css.conn == OC_CONSISTENT) + mask.conn = C_MASK; + if (css.disk == OC_CONSISTENT) + mask.disk = D_MASK; + if (css.pdsk == OC_CONSISTENT) + mask.pdsk = D_MASK; + + *pcs = cs; + *pmask = mask; +} static enum drbd_state_rv conn_is_valid_transition(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, - enum chg_state_flags flags, struct _is_valid_itr_params *params) + enum chg_state_flags flags) { enum drbd_state_rv rv = SS_SUCCESS; union drbd_state ns, os; struct drbd_conf *mdev; int vnr; - params->oc_state = OC_UNINITIALIZED; idr_for_each_entry(&tconn->volumes, mdev, vnr) { os = mdev->state; ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL); - switch (params->oc_state) { - case OC_UNINITIALIZED: - params->oc = os.conn; - params->oc_state = OC_CONSISTENT; - break; - case OC_CONSISTENT: - if (params->oc != os.conn) - params->oc_state = OC_INCONSISTENT; - break; - case OC_INCONSISTENT: - break; - } - if (ns.i == os.i) continue; @@ -1471,7 +1529,6 @@ conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state static enum drbd_state_rv _conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val) { - struct _is_valid_itr_params params; enum drbd_state_rv rv; if (test_and_clear_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags)) @@ -1484,7 +1541,7 @@ _conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state rv = tconn->cstate != C_WF_REPORT_PARAMS ? SS_CW_NO_NEED : SS_UNKNOWN_ERROR; if (rv == SS_UNKNOWN_ERROR) - rv = conn_is_valid_transition(tconn, mask, val, CS_NO_CSTATE_CHG, ¶ms); + rv = conn_is_valid_transition(tconn, mask, val, CS_NO_CSTATE_CHG); if (rv == SS_SUCCESS) rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ @@ -1524,16 +1581,15 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ enum chg_state_flags flags) { enum drbd_state_rv rv = SS_SUCCESS; - struct _is_valid_itr_params params; struct after_conn_state_chg_work *acscw; enum drbd_conns oc = tconn->cstate; - union drbd_state ms; + union drbd_state ms, os_val, os_mask; rv = is_valid_conn_transition(oc, val.conn); if (rv < SS_SUCCESS) goto abort; - rv = conn_is_valid_transition(tconn, mask, val, flags, ¶ms); + rv = conn_is_valid_transition(tconn, mask, val, flags); if (rv < SS_SUCCESS) goto abort; @@ -1544,8 +1600,9 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ goto abort; } - if (params.oc_state == OC_CONSISTENT) { - oc = params.oc; + conn_old_common_state(tconn, &os_val, &os_mask); + if (os_mask.conn == C_MASK) { + oc = os_val.conn; print_conn_state_change(tconn, oc, val.conn); flags |= CS_NO_CSTATE_CHG; } From 435693e89b7a918b14ea67968971a2d8d9e88df2 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 25 Mar 2011 15:11:30 +0100 Subject: [PATCH 273/609] drbd: Print common state changes of all volumes as connection state changes Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 151 ++++++++++++-------------------- drivers/block/drbd/drbd_state.h | 19 ++-- 2 files changed, 66 insertions(+), 104 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 2a170bf7cc2..164a7f821c7 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -350,29 +350,29 @@ void print_st_err(struct drbd_conf *mdev, union drbd_state os, print_st(mdev, "wanted", ns); } -static void print_state_change(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, +static long print_state_change(char *pb, union drbd_state os, union drbd_state ns, enum chg_state_flags flags) { - char *pbp, pb[300]; + char *pbp; pbp = pb; *pbp = 0; - if (ns.role != os.role) + if (ns.role != os.role && flags & CS_DC_ROLE) pbp += sprintf(pbp, "role( %s -> %s ) ", drbd_role_str(os.role), drbd_role_str(ns.role)); - if (ns.peer != os.peer) + if (ns.peer != os.peer && flags & CS_DC_PEER) pbp += sprintf(pbp, "peer( %s -> %s ) ", drbd_role_str(os.peer), drbd_role_str(ns.peer)); - if (ns.conn != os.conn && !(flags & CS_NO_CSTATE_CHG)) + if (ns.conn != os.conn && flags & CS_DC_CONN) pbp += sprintf(pbp, "conn( %s -> %s ) ", drbd_conn_str(os.conn), drbd_conn_str(ns.conn)); - if (ns.disk != os.disk) + if (ns.disk != os.disk && flags & CS_DC_DISK) pbp += sprintf(pbp, "disk( %s -> %s ) ", drbd_disk_str(os.disk), drbd_disk_str(ns.disk)); - if (ns.pdsk != os.pdsk) + if (ns.pdsk != os.pdsk && flags & CS_DC_PDSK) pbp += sprintf(pbp, "pdsk( %s -> %s ) ", drbd_disk_str(os.pdsk), drbd_disk_str(ns.pdsk)); @@ -392,10 +392,29 @@ static void print_state_change(struct drbd_conf *mdev, union drbd_state os, unio pbp += sprintf(pbp, "user_isp( %d -> %d ) ", os.user_isp, ns.user_isp); - if (pbp != pb) + + return pbp - pb; +} + +static void drbd_pr_state_change(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, + enum chg_state_flags flags) +{ + char pb[300]; + + if (print_state_change(pb, os, ns, flags ^ CS_DC_MASK)) dev_info(DEV, "%s\n", pb); } +static void conn_pr_state_change(struct drbd_tconn *tconn, union drbd_state os, union drbd_state ns, + enum chg_state_flags flags) +{ + char pb[300]; + + if (print_state_change(pb, os, ns, flags)) + conn_info(tconn, "%s\n", pb); +} + + /** * is_valid_state() - Returns an SS_ error code if ns is not valid * @mdev: DRBD device. @@ -827,7 +846,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, if (warn_sync_abort) dev_warn(DEV, "%s aborted.\n", warn_sync_abort); - print_state_change(mdev, os, ns, flags); + drbd_pr_state_change(mdev, os, ns, flags); /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference * on the ldev here, to be sure the transition -> D_DISKLESS resp. @@ -1364,98 +1383,41 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) return 0; } -static void print_conn_state_change(struct drbd_tconn *tconn, enum drbd_conns oc, enum drbd_conns nc) +void conn_old_common_state(struct drbd_tconn *tconn, union drbd_state *pcs, enum chg_state_flags *pf) { - char *pbp, pb[300]; - pbp = pb; - *pbp = 0; - if (nc != oc) - pbp += sprintf(pbp, "conn( %s -> %s ) ", - drbd_conn_str(oc), - drbd_conn_str(nc)); - - conn_info(tconn, "%s\n", pb); -} - -enum sp_state { - OC_UNINITIALIZED, - OC_CONSISTENT, - OC_INCONSISTENT, -} oc_state; - -static void common_state_part(enum sp_state *sps, int *sp, int nsp) -{ - switch (*sps) { - case OC_UNINITIALIZED: - *sp = nsp; - *sps = OC_CONSISTENT; - break; - case OC_CONSISTENT: - if (*sp != nsp) - *sps = OC_INCONSISTENT; - break; - case OC_INCONSISTENT: - break; - } -} - -void conn_old_common_state(struct drbd_tconn *tconn, union drbd_state *pcs, union drbd_state *pmask) -{ - union drbd_state css = {}; /* common state state */ + enum chg_state_flags flags = ~0; union drbd_state os, cs = {}; /* old_state, common_state */ - union drbd_state mask = {}; - enum sp_state sps; /* state part state */ - int sp; /* state part */ struct drbd_conf *mdev; - int vnr; + int vnr, first_vol = 1; idr_for_each_entry(&tconn->volumes, mdev, vnr) { os = mdev->state; - sps = css.role; - sp = cs.role; - common_state_part(&sps, &sp, os.role); - css.role = sps; - cs.role = sp; + if (first_vol) { + cs = os; + first_vol = 0; + continue; + } - sps = css.peer; - sp = cs.peer; - common_state_part(&sps, &sp, os.peer); - css.peer = sps; - cs.peer = sp; + if (cs.role != os.role) + flags &= ~CS_DC_ROLE; - sps = css.conn; - sp = cs.conn; - common_state_part(&sps, &sp, os.conn); - css.conn = sps; - cs.conn = sp; + if (cs.peer != os.peer) + flags &= ~CS_DC_PEER; - sps = css.disk; - sp = cs.disk; - common_state_part(&sps, &sp, os.disk); - css.disk = sps; - cs.disk = sp; + if (cs.conn != os.conn) + flags &= ~CS_DC_CONN; - sps = css.pdsk; - sp = cs.pdsk; - common_state_part(&sps, &sp, os.pdsk); - css.pdsk = sps; - cs.pdsk = sp; + if (cs.disk != os.disk) + flags &= ~CS_DC_DISK; + + if (cs.pdsk != os.pdsk) + flags &= ~CS_DC_PDSK; } - if (css.role == OC_CONSISTENT) - mask.role = R_MASK; - if (css.peer == OC_CONSISTENT) - mask.peer = R_MASK; - if (css.conn == OC_CONSISTENT) - mask.conn = C_MASK; - if (css.disk == OC_CONSISTENT) - mask.disk = D_MASK; - if (css.pdsk == OC_CONSISTENT) - mask.pdsk = D_MASK; - + *pf |= CS_DC_MASK; + *pf &= flags; *pcs = cs; - *pmask = mask; } static enum drbd_state_rv @@ -1541,7 +1503,7 @@ _conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state rv = tconn->cstate != C_WF_REPORT_PARAMS ? SS_CW_NO_NEED : SS_UNKNOWN_ERROR; if (rv == SS_UNKNOWN_ERROR) - rv = conn_is_valid_transition(tconn, mask, val, CS_NO_CSTATE_CHG); + rv = conn_is_valid_transition(tconn, mask, val, 0); if (rv == SS_SUCCESS) rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ @@ -1583,7 +1545,7 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ enum drbd_state_rv rv = SS_SUCCESS; struct after_conn_state_chg_work *acscw; enum drbd_conns oc = tconn->cstate; - union drbd_state ms, os_val, os_mask; + union drbd_state ms, os; rv = is_valid_conn_transition(oc, val.conn); if (rv < SS_SUCCESS) @@ -1600,19 +1562,14 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ goto abort; } - conn_old_common_state(tconn, &os_val, &os_mask); - if (os_mask.conn == C_MASK) { - oc = os_val.conn; - print_conn_state_change(tconn, oc, val.conn); - flags |= CS_NO_CSTATE_CHG; - } - + conn_old_common_state(tconn, &os, &flags); ms = conn_set_state(tconn, mask, val, flags); ms.conn = val.conn; + conn_pr_state_change(tconn, os, ms, flags); acscw = kmalloc(sizeof(*acscw), GFP_ATOMIC); if (acscw) { - acscw->oc = oc; + acscw->oc = os.conn; acscw->nms = ms; acscw->flags = flags; acscw->w.cb = w_after_conn_state_ch; diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index 394a1998acd..11fd0f897bd 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -57,13 +57,18 @@ struct drbd_tconn; __ns.T2 = (S2); __ns.T3 = (S3); __ns; }) enum chg_state_flags { - CS_HARD = 1, - CS_VERBOSE = 2, - CS_WAIT_COMPLETE = 4, - CS_SERIALIZE = 8, - CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE, - CS_NO_CSTATE_CHG = 16, /* Do not display changes in cstate. Internal to drbd_state.c */ - CS_LOCAL_ONLY = 32, /* Do not consider a device pair wide state change */ + CS_HARD = 1 << 0, + CS_VERBOSE = 1 << 1, + CS_WAIT_COMPLETE = 1 << 2, + CS_SERIALIZE = 1 << 3, + CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE, + CS_LOCAL_ONLY = 1 << 4, /* Do not consider a device pair wide state change */ + CS_DC_ROLE = 1 << 5, /* DC = display as connection state change */ + CS_DC_PEER = 1 << 6, + CS_DC_CONN = 1 << 7, + CS_DC_DISK = 1 << 8, + CS_DC_PDSK = 1 << 9, + CS_DC_MASK = CS_DC_ROLE + CS_DC_PEER + CS_DC_CONN + CS_DC_DISK + CS_DC_PDSK, }; extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev, From 778bcf2e290fc9f13735c32640cdafb34794ebd1 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 28 Mar 2011 12:55:03 +0200 Subject: [PATCH 274/609] drbd: Allow to disconnect if one volume is diskless Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/drbd/drbd_state.c | 6 ++++++ drivers/block/drbd/drbd_state.h | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 3243c784991..b9bcb8bf7ec 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3447,7 +3447,7 @@ static int receive_req_conn_state(struct drbd_tconn *tconn, struct packet_info * mask = convert_state(mask); val = convert_state(val); - rv = conn_request_state(tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY); + rv = conn_request_state(tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL); conn_send_sr_reply(tconn, rv); return 0; diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 164a7f821c7..ca77da38a0d 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1433,6 +1433,9 @@ conn_is_valid_transition(struct drbd_tconn *tconn, union drbd_state mask, union os = mdev->state; ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL); + if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED) + ns.disk = os.disk; + if (ns.i == os.i) continue; @@ -1475,6 +1478,9 @@ conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state ns = apply_mask_val(os, mask, val); ns = sanitize_state(mdev, ns, NULL); + if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED) + ns.disk = os.disk; + rv = __drbd_set_state(mdev, ns, flags, NULL); if (rv < SS_SUCCESS) BUG(); diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index 11fd0f897bd..c0331f18371 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -69,6 +69,7 @@ enum chg_state_flags { CS_DC_DISK = 1 << 8, CS_DC_PDSK = 1 << 9, CS_DC_MASK = CS_DC_ROLE + CS_DC_PEER + CS_DC_CONN + CS_DC_DISK + CS_DC_PDSK, + CS_IGN_OUTD_FAIL = 1 << 10, }; extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev, From 438c8374ae3e87f44d945a2ac2901e3b14aec1a8 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 28 Mar 2011 14:48:01 +0200 Subject: [PATCH 275/609] drbd: Do not segfault if a sync dependency reaches a diskless device Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_worker.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index cbbc5c7cbef..c3d1f5f5853 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1319,6 +1319,8 @@ static int _drbd_may_sync_now(struct drbd_conf *mdev) struct drbd_conf *odev = mdev; while (1) { + if (!odev->ldev) + return 1; if (odev->ldev->dc.resync_after == -1) return 1; odev = minor_to_mdev(odev->ldev->dc.resync_after); From e05e1e59db07b2a7f493c0219230a7be13f87f9d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 25 Mar 2011 15:16:26 +0100 Subject: [PATCH 276/609] drbd: Pass struct packet_info down to the asender receive functions Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 36 +++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index b9bcb8bf7ec..232ca28cb99 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4459,7 +4459,7 @@ int drbdd_init(struct drbd_thread *thi) /* ********* acknowledge sender ******** */ -static int got_conn_RqSReply(struct drbd_tconn *tconn, enum drbd_packet cmd) +static int got_conn_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi) { struct p_req_state_reply *p = tconn->meta.rbuf; int retcode = be32_to_cpu(p->retcode); @@ -4476,7 +4476,7 @@ static int got_conn_RqSReply(struct drbd_tconn *tconn, enum drbd_packet cmd) return true; } -static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd) +static int got_RqSReply(struct drbd_conf *mdev, struct packet_info *pi) { struct p_req_state_reply *p = mdev->tconn->meta.rbuf; int retcode = be32_to_cpu(p->retcode); @@ -4493,13 +4493,13 @@ static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd) return true; } -static int got_Ping(struct drbd_tconn *tconn, enum drbd_packet cmd) +static int got_Ping(struct drbd_tconn *tconn, struct packet_info *pi) { return drbd_send_ping_ack(tconn); } -static int got_PingAck(struct drbd_tconn *tconn, enum drbd_packet cmd) +static int got_PingAck(struct drbd_tconn *tconn, struct packet_info *pi) { /* restore idle timeout */ tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ; @@ -4509,7 +4509,7 @@ static int got_PingAck(struct drbd_tconn *tconn, enum drbd_packet cmd) return true; } -static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packet cmd) +static int got_IsInSync(struct drbd_conf *mdev, struct packet_info *pi) { struct p_block_ack *p = mdev->tconn->meta.rbuf; sector_t sector = be64_to_cpu(p->sector); @@ -4554,7 +4554,7 @@ validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector, return true; } -static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd) +static int got_BlockAck(struct drbd_conf *mdev, struct packet_info *pi) { struct p_block_ack *p = mdev->tconn->meta.rbuf; sector_t sector = be64_to_cpu(p->sector); @@ -4568,7 +4568,7 @@ static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd) dec_rs_pending(mdev); return true; } - switch (cmd) { + switch (pi->cmd) { case P_RS_WRITE_ACK: D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); what = WRITE_ACKED_BY_PEER_AND_SIS; @@ -4599,7 +4599,7 @@ static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd) what, false); } -static int got_NegAck(struct drbd_conf *mdev, enum drbd_packet cmd) +static int got_NegAck(struct drbd_conf *mdev, struct packet_info *pi) { struct p_block_ack *p = mdev->tconn->meta.rbuf; sector_t sector = be64_to_cpu(p->sector); @@ -4632,7 +4632,7 @@ static int got_NegAck(struct drbd_conf *mdev, enum drbd_packet cmd) return true; } -static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd) +static int got_NegDReply(struct drbd_conf *mdev, struct packet_info *pi) { struct p_block_ack *p = mdev->tconn->meta.rbuf; sector_t sector = be64_to_cpu(p->sector); @@ -4647,7 +4647,7 @@ static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd) NEG_ACKED, false); } -static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd) +static int got_NegRSDReply(struct drbd_conf *mdev, struct packet_info *pi) { sector_t sector; int size; @@ -4662,7 +4662,7 @@ static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd) if (get_ldev_if_state(mdev, D_FAILED)) { drbd_rs_complete_io(mdev, sector); - switch (cmd) { + switch (pi->cmd) { case P_NEG_RS_DREPLY: drbd_rs_failed_io(mdev, sector, size); case P_RS_CANCEL: @@ -4678,7 +4678,7 @@ static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd) return true; } -static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd) +static int got_BarrierAck(struct drbd_conf *mdev, struct packet_info *pi) { struct p_barrier_ack *p = mdev->tconn->meta.rbuf; @@ -4694,7 +4694,7 @@ static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd) return true; } -static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd) +static int got_OVResult(struct drbd_conf *mdev, struct packet_info *pi) { struct p_block_ack *p = mdev->tconn->meta.rbuf; struct drbd_work *w; @@ -4739,7 +4739,7 @@ static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd) return true; } -static int got_skip(struct drbd_conf *mdev, enum drbd_packet cmd) +static int got_skip(struct drbd_conf *mdev, struct packet_info *pi) { return true; } @@ -4774,8 +4774,8 @@ struct asender_cmd { size_t pkt_size; enum mdev_or_conn fa_type; /* first argument's type */ union { - int (*mdev_fn)(struct drbd_conf *mdev, enum drbd_packet cmd); - int (*conn_fn)(struct drbd_tconn *tconn, enum drbd_packet cmd); + int (*mdev_fn)(struct drbd_conf *mdev, struct packet_info *); + int (*conn_fn)(struct drbd_tconn *tconn, struct packet_info *); }; }; @@ -4902,10 +4902,10 @@ int drbd_asender(struct drbd_thread *thi) bool rv; if (cmd->fa_type == CONN) { - rv = cmd->conn_fn(tconn, pi.cmd); + rv = cmd->conn_fn(tconn, &pi); } else { struct drbd_conf *mdev = vnr_to_mdev(tconn, pi.vnr); - rv = cmd->mdev_fn(mdev, pi.cmd); + rv = cmd->mdev_fn(mdev, &pi); } if (!rv) From 1952e9166a335b552ecb5964c1fb4609bd999de8 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 25 Mar 2011 15:37:43 +0100 Subject: [PATCH 277/609] drbd: Map from (connection, volume number) to device in the asender handlers Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 137 +++++++++++++++++------------ 1 file changed, 82 insertions(+), 55 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 232ca28cb99..b7cb7590e29 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -60,11 +60,6 @@ enum finish_epoch { FE_RECYCLED, }; -enum mdev_or_conn { - MDEV, - CONN, -}; - static int drbd_do_handshake(struct drbd_tconn *tconn); static int drbd_do_auth(struct drbd_tconn *tconn); static int drbd_disconnected(int vnr, void *p, void *data); @@ -4476,11 +4471,16 @@ static int got_conn_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi) return true; } -static int got_RqSReply(struct drbd_conf *mdev, struct packet_info *pi) +static int got_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi) { - struct p_req_state_reply *p = mdev->tconn->meta.rbuf; + struct drbd_conf *mdev; + struct p_req_state_reply *p = tconn->meta.rbuf; int retcode = be32_to_cpu(p->retcode); + mdev = vnr_to_mdev(tconn, pi->vnr); + if (!mdev) + return false; + if (retcode >= SS_SUCCESS) { set_bit(CL_ST_CHG_SUCCESS, &mdev->flags); } else { @@ -4509,12 +4509,17 @@ static int got_PingAck(struct drbd_tconn *tconn, struct packet_info *pi) return true; } -static int got_IsInSync(struct drbd_conf *mdev, struct packet_info *pi) +static int got_IsInSync(struct drbd_tconn *tconn, struct packet_info *pi) { - struct p_block_ack *p = mdev->tconn->meta.rbuf; + struct drbd_conf *mdev; + struct p_block_ack *p = tconn->meta.rbuf; sector_t sector = be64_to_cpu(p->sector); int blksize = be32_to_cpu(p->blksize); + mdev = vnr_to_mdev(tconn, pi->vnr); + if (!mdev) + return false; + D_ASSERT(mdev->tconn->agreed_pro_version >= 89); update_peer_seq(mdev, be32_to_cpu(p->seq_num)); @@ -4554,13 +4559,18 @@ validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector, return true; } -static int got_BlockAck(struct drbd_conf *mdev, struct packet_info *pi) +static int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi) { - struct p_block_ack *p = mdev->tconn->meta.rbuf; + struct drbd_conf *mdev; + struct p_block_ack *p = tconn->meta.rbuf; sector_t sector = be64_to_cpu(p->sector); int blksize = be32_to_cpu(p->blksize); enum drbd_req_event what; + mdev = vnr_to_mdev(tconn, pi->vnr); + if (!mdev) + return false; + update_peer_seq(mdev, be32_to_cpu(p->seq_num)); if (p->block_id == ID_SYNCER) { @@ -4599,15 +4609,20 @@ static int got_BlockAck(struct drbd_conf *mdev, struct packet_info *pi) what, false); } -static int got_NegAck(struct drbd_conf *mdev, struct packet_info *pi) +static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi) { - struct p_block_ack *p = mdev->tconn->meta.rbuf; + struct drbd_conf *mdev; + struct p_block_ack *p = tconn->meta.rbuf; sector_t sector = be64_to_cpu(p->sector); int size = be32_to_cpu(p->blksize); - bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A || - mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B; + bool missing_ok = tconn->net_conf->wire_protocol == DRBD_PROT_A || + tconn->net_conf->wire_protocol == DRBD_PROT_B; bool found; + mdev = vnr_to_mdev(tconn, pi->vnr); + if (!mdev) + return false; + update_peer_seq(mdev, be32_to_cpu(p->seq_num)); if (p->block_id == ID_SYNCER) { @@ -4632,11 +4647,16 @@ static int got_NegAck(struct drbd_conf *mdev, struct packet_info *pi) return true; } -static int got_NegDReply(struct drbd_conf *mdev, struct packet_info *pi) +static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi) { - struct p_block_ack *p = mdev->tconn->meta.rbuf; + struct drbd_conf *mdev; + struct p_block_ack *p = tconn->meta.rbuf; sector_t sector = be64_to_cpu(p->sector); + mdev = vnr_to_mdev(tconn, pi->vnr); + if (!mdev) + return false; + update_peer_seq(mdev, be32_to_cpu(p->seq_num)); dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n", @@ -4647,11 +4667,16 @@ static int got_NegDReply(struct drbd_conf *mdev, struct packet_info *pi) NEG_ACKED, false); } -static int got_NegRSDReply(struct drbd_conf *mdev, struct packet_info *pi) +static int got_NegRSDReply(struct drbd_tconn *tconn, struct packet_info *pi) { + struct drbd_conf *mdev; sector_t sector; int size; - struct p_block_ack *p = mdev->tconn->meta.rbuf; + struct p_block_ack *p = tconn->meta.rbuf; + + mdev = vnr_to_mdev(tconn, pi->vnr); + if (!mdev) + return false; sector = be64_to_cpu(p->sector); size = be32_to_cpu(p->blksize); @@ -4678,9 +4703,14 @@ static int got_NegRSDReply(struct drbd_conf *mdev, struct packet_info *pi) return true; } -static int got_BarrierAck(struct drbd_conf *mdev, struct packet_info *pi) +static int got_BarrierAck(struct drbd_tconn *tconn, struct packet_info *pi) { - struct p_barrier_ack *p = mdev->tconn->meta.rbuf; + struct drbd_conf *mdev; + struct p_barrier_ack *p = tconn->meta.rbuf; + + mdev = vnr_to_mdev(tconn, pi->vnr); + if (!mdev) + return false; tl_release(mdev->tconn, p->barrier, be32_to_cpu(p->set_size)); @@ -4694,13 +4724,18 @@ static int got_BarrierAck(struct drbd_conf *mdev, struct packet_info *pi) return true; } -static int got_OVResult(struct drbd_conf *mdev, struct packet_info *pi) +static int got_OVResult(struct drbd_tconn *tconn, struct packet_info *pi) { - struct p_block_ack *p = mdev->tconn->meta.rbuf; + struct drbd_conf *mdev; + struct p_block_ack *p = tconn->meta.rbuf; struct drbd_work *w; sector_t sector; int size; + mdev = vnr_to_mdev(tconn, pi->vnr); + if (!mdev) + return false; + sector = be64_to_cpu(p->sector); size = be32_to_cpu(p->blksize); @@ -4739,7 +4774,7 @@ static int got_OVResult(struct drbd_conf *mdev, struct packet_info *pi) return true; } -static int got_skip(struct drbd_conf *mdev, struct packet_info *pi) +static int got_skip(struct drbd_tconn *tconn, struct packet_info *pi) { return true; } @@ -4772,31 +4807,27 @@ static int tconn_process_done_ee(struct drbd_tconn *tconn) struct asender_cmd { size_t pkt_size; - enum mdev_or_conn fa_type; /* first argument's type */ - union { - int (*mdev_fn)(struct drbd_conf *mdev, struct packet_info *); - int (*conn_fn)(struct drbd_tconn *tconn, struct packet_info *); - }; + int (*fn)(struct drbd_tconn *tconn, struct packet_info *); }; static struct asender_cmd asender_tbl[] = { - [P_PING] = { sizeof(struct p_header), CONN, { .conn_fn = got_Ping } }, - [P_PING_ACK] = { sizeof(struct p_header), CONN, { .conn_fn = got_PingAck } }, - [P_RECV_ACK] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } }, - [P_WRITE_ACK] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } }, - [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } }, - [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } }, - [P_NEG_ACK] = { sizeof(struct p_block_ack), MDEV, { got_NegAck } }, - [P_NEG_DREPLY] = { sizeof(struct p_block_ack), MDEV, { got_NegDReply } }, - [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), MDEV, { got_NegRSDReply } }, - [P_OV_RESULT] = { sizeof(struct p_block_ack), MDEV, { got_OVResult } }, - [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), MDEV, { got_BarrierAck } }, - [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), MDEV, { got_RqSReply } }, - [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), MDEV, { got_IsInSync } }, - [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), MDEV, { got_skip } }, - [P_RS_CANCEL] = { sizeof(struct p_block_ack), MDEV, { got_NegRSDReply } }, - [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), CONN, {.conn_fn = got_conn_RqSReply}}, - [P_RETRY_WRITE] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } }, + [P_PING] = { sizeof(struct p_header), got_Ping }, + [P_PING_ACK] = { sizeof(struct p_header), got_PingAck }, + [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, + [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, + [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, + [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, + [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck }, + [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply }, + [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply }, + [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult }, + [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck }, + [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, + [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, + [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip }, + [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply }, + [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply }, + [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, }; int drbd_asender(struct drbd_thread *thi) @@ -4886,7 +4917,7 @@ int drbd_asender(struct drbd_thread *thi) if (decode_header(tconn, h, &pi)) goto reconnect; cmd = &asender_tbl[pi.cmd]; - if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd) { + if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) { conn_err(tconn, "unknown command %d on meta (l: %d)\n", pi.cmd, pi.size); goto disconnect; @@ -4901,15 +4932,11 @@ int drbd_asender(struct drbd_thread *thi) if (received == expect) { bool rv; - if (cmd->fa_type == CONN) { - rv = cmd->conn_fn(tconn, &pi); - } else { - struct drbd_conf *mdev = vnr_to_mdev(tconn, pi.vnr); - rv = cmd->mdev_fn(mdev, &pi); - } - - if (!rv) + rv = cmd->fn(tconn, &pi); + if (!rv) { + conn_err(tconn, "%pf failed\n", cmd->fn); goto reconnect; + } tconn->last_received = jiffies; From 2bf896213d4faa7289316663f5e8e0bc35d80abf Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 28 Mar 2011 16:33:12 +0200 Subject: [PATCH 278/609] drbd: drbd_connect(): Initialize struct drbd_socket before sending anything Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 66 +++++++++++++++--------------- 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index b7cb7590e29..bfec09a2232 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -807,7 +807,7 @@ int drbd_connected(int vnr, void *p, void *data) */ static int drbd_connect(struct drbd_tconn *tconn) { - struct socket *s, *sock, *msock; + struct socket *sock, *msock; int try, h, ok; if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS) @@ -818,10 +818,9 @@ static int drbd_connect(struct drbd_tconn *tconn) /* Assume that the peer only understands protocol 80 until we know better. */ tconn->agreed_pro_version = 80; - sock = NULL; - msock = NULL; - do { + struct socket *s; + for (try = 0;;) { /* 3 tries, this should take less than a second! */ s = drbd_try_connect(tconn); @@ -832,24 +831,22 @@ static int drbd_connect(struct drbd_tconn *tconn) } if (s) { - if (!sock) { - drbd_send_fp(tconn, s, P_HAND_SHAKE_S); - sock = s; - s = NULL; - } else if (!msock) { - drbd_send_fp(tconn, s, P_HAND_SHAKE_M); - msock = s; - s = NULL; + if (!tconn->data.socket) { + tconn->data.socket = s; + drbd_send_fp(tconn, tconn->data.socket, P_HAND_SHAKE_S); + } else if (!tconn->meta.socket) { + tconn->meta.socket = s; + drbd_send_fp(tconn, tconn->meta.socket, P_HAND_SHAKE_M); } else { conn_err(tconn, "Logic error in drbd_connect()\n"); goto out_release_sockets; } } - if (sock && msock) { + if (tconn->data.socket && tconn->meta.socket) { schedule_timeout_interruptible(tconn->net_conf->ping_timeo*HZ/10); - ok = drbd_socket_okay(&sock); - ok = drbd_socket_okay(&msock) && ok; + ok = drbd_socket_okay(&tconn->data.socket); + ok = drbd_socket_okay(&tconn->meta.socket) && ok; if (ok) break; } @@ -858,22 +855,22 @@ retry: s = drbd_wait_for_connect(tconn); if (s) { try = drbd_recv_fp(tconn, s); - drbd_socket_okay(&sock); - drbd_socket_okay(&msock); + drbd_socket_okay(&tconn->data.socket); + drbd_socket_okay(&tconn->meta.socket); switch (try) { case P_HAND_SHAKE_S: - if (sock) { + if (tconn->data.socket) { conn_warn(tconn, "initial packet S crossed\n"); - sock_release(sock); + sock_release(tconn->data.socket); } - sock = s; + tconn->data.socket = s; break; case P_HAND_SHAKE_M: - if (msock) { + if (tconn->meta.socket) { conn_warn(tconn, "initial packet M crossed\n"); - sock_release(msock); + sock_release(tconn->meta.socket); } - msock = s; + tconn->meta.socket = s; set_bit(DISCARD_CONCURRENT, &tconn->flags); break; default: @@ -893,14 +890,17 @@ retry: goto out_release_sockets; } - if (sock && msock) { - ok = drbd_socket_okay(&sock); - ok = drbd_socket_okay(&msock) && ok; + if (tconn->data.socket && &tconn->meta.socket) { + ok = drbd_socket_okay(&tconn->data.socket); + ok = drbd_socket_okay(&tconn->meta.socket) && ok; if (ok) break; } } while (1); + sock = tconn->data.socket; + msock = tconn->meta.socket; + msock->sk->sk_reuse = 1; /* SO_REUSEADDR */ sock->sk->sk_reuse = 1; /* SO_REUSEADDR */ @@ -926,8 +926,6 @@ retry: drbd_tcp_nodelay(sock); drbd_tcp_nodelay(msock); - tconn->data.socket = sock; - tconn->meta.socket = msock; tconn->last_received = jiffies; h = drbd_do_handshake(tconn); @@ -960,10 +958,14 @@ retry: return !idr_for_each(&tconn->volumes, drbd_connected, tconn); out_release_sockets: - if (sock) - sock_release(sock); - if (msock) - sock_release(msock); + if (tconn->data.socket) { + sock_release(tconn->data.socket); + tconn->data.socket = NULL; + } + if (tconn->meta.socket) { + sock_release(tconn->meta.socket); + tconn->meta.socket = NULL; + } return -1; } From 7c96715aa8ef1b5375c0d2a2d3bb1da99d95a39e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 22 Mar 2011 00:49:36 +0100 Subject: [PATCH 279/609] drbd: _conn_send_cmd(), _drbd_send_cmd(): Pass a struct drbd_socket instead of a plain socket Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 ++-- drivers/block/drbd/drbd_main.c | 22 +++++++++++----------- drivers/block/drbd/drbd_receiver.c | 16 ++++++++-------- drivers/block/drbd/drbd_worker.c | 2 +- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 9f1f0f56f71..a4ac5837054 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1113,7 +1113,7 @@ extern int _conn_send_state_req(struct drbd_tconn *, int vnr, enum drbd_packet c union drbd_state, union drbd_state); extern int _drbd_send_state(struct drbd_conf *mdev); extern int drbd_send_state(struct drbd_conf *mdev); -extern int _conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct socket *sock, +extern int _conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct drbd_socket *sock, enum drbd_packet cmd, struct p_header *h, size_t size, unsigned msg_flags); extern int conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct drbd_socket *sock, @@ -1860,7 +1860,7 @@ static inline void request_ping(struct drbd_tconn *tconn) wake_asender(tconn); } -static inline int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, +static inline int _drbd_send_cmd(struct drbd_conf *mdev, struct drbd_socket *sock, enum drbd_packet cmd, struct p_header *h, size_t size, unsigned msg_flags) { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 3e32836db55..4c94be25a77 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -719,14 +719,14 @@ static void prepare_header(struct drbd_conf *mdev, struct p_header *h, } /* the appropriate socket mutex must be held already */ -int _conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct socket *sock, +int _conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct drbd_socket *sock, enum drbd_packet cmd, struct p_header *h, size_t size, unsigned msg_flags) { int err; _prepare_header(tconn, vnr, h, cmd, size - sizeof(struct p_header)); - err = drbd_send_all(tconn, sock, h, size, msg_flags); + err = drbd_send_all(tconn, sock->socket, h, size, msg_flags); if (err && !signal_pending(current)) conn_warn(tconn, "short send %s size=%d\n", cmdname(cmd), (int)size); @@ -743,7 +743,7 @@ int conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct drbd_socket *sock, mutex_lock(&sock->mutex); if (sock->socket) - err = _conn_send_cmd(tconn, vnr, sock->socket, cmd, h, size, 0); + err = _conn_send_cmd(tconn, vnr, sock, cmd, h, size, 0); mutex_unlock(&sock->mutex); return err; } @@ -780,7 +780,7 @@ int drbd_send_ping_ack(struct drbd_tconn *tconn) int drbd_send_sync_param(struct drbd_conf *mdev) { struct p_rs_param_95 *p; - struct socket *sock; + struct drbd_socket *sock; int size, err; const int apv = mdev->tconn->agreed_pro_version; @@ -791,9 +791,9 @@ int drbd_send_sync_param(struct drbd_conf *mdev) : /* apv >= 95 */ sizeof(struct p_rs_param_95); mutex_lock(&mdev->tconn->data.mutex); - sock = mdev->tconn->data.socket; + sock = &mdev->tconn->data; - if (likely(sock != NULL)) { + if (likely(sock->socket != NULL)) { enum drbd_packet cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM; @@ -979,16 +979,16 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl */ int drbd_send_state(struct drbd_conf *mdev) { - struct socket *sock; + struct drbd_socket *sock; struct p_state p; int err = -EIO; mutex_lock(&mdev->tconn->data.mutex); p.state = cpu_to_be32(mdev->state.i); /* Within the send mutex */ - sock = mdev->tconn->data.socket; + sock = &mdev->tconn->data; - if (likely(sock != NULL)) + if (likely(sock->socket != NULL)) err = _drbd_send_cmd(mdev, sock, P_STATE, &p.head, sizeof(p), 0); mutex_unlock(&mdev->tconn->data.mutex); @@ -1157,7 +1157,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, struct bm_xfer_ctx *c) if (len) { dcbp_set_code(p, RLE_VLI_Bits); - err = _drbd_send_cmd(mdev, mdev->tconn->data.socket, + err = _drbd_send_cmd(mdev, &mdev->tconn->data, P_COMPRESSED_BITMAP, &p->head, sizeof(*p) + len, 0); @@ -1175,7 +1175,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, struct bm_xfer_ctx *c) if (len) drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long *)h->payload); - err = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_BITMAP, + err = _drbd_send_cmd(mdev, &mdev->tconn->data, P_BITMAP, h, sizeof(struct p_header80) + len, 0); c->word_offset += num_words; c->bit_offset = c->word_offset * BITS_PER_LONG; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index bfec09a2232..40cecd6c7e3 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -729,7 +729,7 @@ out: return s_estab; } -static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd_packet cmd) +static int drbd_send_fp(struct drbd_tconn *tconn, struct drbd_socket *sock, enum drbd_packet cmd) { struct p_header *h = tconn->data.sbuf; @@ -738,13 +738,13 @@ static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock) { - struct p_header80 *h = tconn->data.rbuf; + struct p_header80 h; int rr; - rr = drbd_recv_short(sock, h, sizeof(*h), 0); + rr = drbd_recv_short(sock, &h, sizeof(h), 0); - if (rr == sizeof(*h) && h->magic == cpu_to_be32(DRBD_MAGIC)) - return be16_to_cpu(h->command); + if (rr == sizeof(h) && h.magic == cpu_to_be32(DRBD_MAGIC)) + return be16_to_cpu(h.command); return 0xffff; } @@ -833,10 +833,10 @@ static int drbd_connect(struct drbd_tconn *tconn) if (s) { if (!tconn->data.socket) { tconn->data.socket = s; - drbd_send_fp(tconn, tconn->data.socket, P_HAND_SHAKE_S); + drbd_send_fp(tconn, &tconn->data, P_HAND_SHAKE_S); } else if (!tconn->meta.socket) { tconn->meta.socket = s; - drbd_send_fp(tconn, tconn->meta.socket, P_HAND_SHAKE_M); + drbd_send_fp(tconn, &tconn->meta, P_HAND_SHAKE_M); } else { conn_err(tconn, "Logic error in drbd_connect()\n"); goto out_release_sockets; @@ -4195,7 +4195,7 @@ static int drbd_send_handshake(struct drbd_tconn *tconn) memset(p, 0, sizeof(*p)); p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); - err = _conn_send_cmd(tconn, 0, tconn->data.socket, P_HAND_SHAKE, + err = _conn_send_cmd(tconn, 0, &tconn->data, P_HAND_SHAKE, &p->head, sizeof(*p), 0); mutex_unlock(&tconn->data.mutex); return err; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index c3d1f5f5853..6e02b4efe9a 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1215,7 +1215,7 @@ int w_send_barrier(struct drbd_work *w, int cancel) /* inc_ap_pending was done where this was queued. * dec_ap_pending will be done in got_BarrierAck * or (on connection loss) in w_clear_epoch. */ - err = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_BARRIER, + err = _drbd_send_cmd(mdev, &mdev->tconn->data, P_BARRIER, &p->head, sizeof(*p), 0); drbd_put_data_sock(mdev->tconn); From e5d6f33abe9da025d3d891367f93d084e8c74bf5 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 28 Mar 2011 16:44:40 +0200 Subject: [PATCH 280/609] drbd: Change how the initial packets are called The first packets exchanged when a connection is established are referred to as P_HAND_SHAKE_S and P_HAND_SHAKE_M in the code, followed by P_HAND_SHAKE packets. To avoid confusion between these two unrelated things, call the initial packets P_INITIAL_DATA and P_INITIAL_META. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 ++-- drivers/block/drbd/drbd_main.c | 8 ++++---- drivers/block/drbd/drbd_receiver.c | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index a4ac5837054..089c1687bf3 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -232,8 +232,8 @@ enum drbd_packet { /* special command ids for handshake */ - P_HAND_SHAKE_M = 0xfff1, /* First Packet on the MetaSock */ - P_HAND_SHAKE_S = 0xfff2, /* First Packet on the Socket */ + P_INITIAL_META = 0xfff1, /* First Packet on the MetaSock */ + P_INITIAL_DATA = 0xfff2, /* First Packet on the Socket */ P_HAND_SHAKE = 0xfffe /* FIXED for the next century! */ }; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 4c94be25a77..a991d1c589f 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3145,10 +3145,10 @@ const char *cmdname(enum drbd_packet cmd) [P_RETRY_WRITE] = "RetryWrite", }; - if (cmd == P_HAND_SHAKE_M) - return "HandShakeM"; - if (cmd == P_HAND_SHAKE_S) - return "HandShakeS"; + if (cmd == P_INITIAL_META) + return "InitialMeta"; + if (cmd == P_INITIAL_DATA) + return "InitialData"; if (cmd == P_HAND_SHAKE) return "HandShake"; if (cmd >= ARRAY_SIZE(cmdnames)) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 40cecd6c7e3..a5bf2b5042e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -833,10 +833,10 @@ static int drbd_connect(struct drbd_tconn *tconn) if (s) { if (!tconn->data.socket) { tconn->data.socket = s; - drbd_send_fp(tconn, &tconn->data, P_HAND_SHAKE_S); + drbd_send_fp(tconn, &tconn->data, P_INITIAL_DATA); } else if (!tconn->meta.socket) { tconn->meta.socket = s; - drbd_send_fp(tconn, &tconn->meta, P_HAND_SHAKE_M); + drbd_send_fp(tconn, &tconn->meta, P_INITIAL_META); } else { conn_err(tconn, "Logic error in drbd_connect()\n"); goto out_release_sockets; @@ -858,14 +858,14 @@ retry: drbd_socket_okay(&tconn->data.socket); drbd_socket_okay(&tconn->meta.socket); switch (try) { - case P_HAND_SHAKE_S: + case P_INITIAL_DATA: if (tconn->data.socket) { conn_warn(tconn, "initial packet S crossed\n"); sock_release(tconn->data.socket); } tconn->data.socket = s; break; - case P_HAND_SHAKE_M: + case P_INITIAL_META: if (tconn->meta.socket) { conn_warn(tconn, "initial packet M crossed\n"); sock_release(tconn->meta.socket); From 6038178ebe29e6b5e4d519a5ac56653d156c90f9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 28 Mar 2011 17:05:50 +0200 Subject: [PATCH 281/609] drbd: Change how the "handshake" packets are called Packets of type P_HAND_SHAKE define which protocol versions and features a node supports. For clarity, call those packets P_CONNECTION_FEATURES instead. (This does not determine the features that a specific drbd device supports, such as drbd protocol A, B, C.) Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 6 +++--- drivers/block/drbd/drbd_main.c | 6 +++--- drivers/block/drbd/drbd_receiver.c | 26 +++++++++++++------------- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 089c1687bf3..d886dc1fc40 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -235,7 +235,7 @@ enum drbd_packet { P_INITIAL_META = 0xfff1, /* First Packet on the MetaSock */ P_INITIAL_DATA = 0xfff2, /* First Packet on the Socket */ - P_HAND_SHAKE = 0xfffe /* FIXED for the next century! */ + P_CONNECTION_FEATURES = 0xfffe /* FIXED for the next century! */ }; extern const char *cmdname(enum drbd_packet cmd); @@ -374,14 +374,14 @@ struct p_block_req { /* * commands with their own struct for additional fields: - * P_HAND_SHAKE + * P_CONNECTION_FEATURES * P_BARRIER * P_BARRIER_ACK * P_SYNC_PARAM * ReportParams */ -struct p_handshake { +struct p_connection_features { struct p_header head; /* Note: vnr will be ignored */ u32 protocol_min; u32 feature_flags; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index a991d1c589f..71e3470304d 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2497,7 +2497,7 @@ int __init drbd_init(void) int err; BUILD_BUG_ON(sizeof(struct p_header80) != sizeof(struct p_header95)); - BUILD_BUG_ON(sizeof(struct p_handshake) != 80); + BUILD_BUG_ON(sizeof(struct p_connection_features) != 80); if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) { printk(KERN_ERR @@ -3149,8 +3149,8 @@ const char *cmdname(enum drbd_packet cmd) return "InitialMeta"; if (cmd == P_INITIAL_DATA) return "InitialData"; - if (cmd == P_HAND_SHAKE) - return "HandShake"; + if (cmd == P_CONNECTION_FEATURES) + return "ConnectionFeatures"; if (cmd >= ARRAY_SIZE(cmdnames)) return "Unknown"; return cmdnames[cmd]; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index a5bf2b5042e..e52ae9becdc 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -60,7 +60,7 @@ enum finish_epoch { FE_RECYCLED, }; -static int drbd_do_handshake(struct drbd_tconn *tconn); +static int drbd_do_features(struct drbd_tconn *tconn); static int drbd_do_auth(struct drbd_tconn *tconn); static int drbd_disconnected(int vnr, void *p, void *data); @@ -913,7 +913,7 @@ retry: /* NOT YET ... * sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10; * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; - * first set it to the P_HAND_SHAKE timeout, + * first set it to the P_CONNECTION_FEATURES timeout, * which we set to 4x the configured ping_timeout. */ sock->sk->sk_sndtimeo = sock->sk->sk_rcvtimeo = tconn->net_conf->ping_timeo*4*HZ/10; @@ -928,7 +928,7 @@ retry: tconn->last_received = jiffies; - h = drbd_do_handshake(tconn); + h = drbd_do_features(tconn); if (h <= 0) return h; @@ -4176,10 +4176,10 @@ static int drbd_disconnected(int vnr, void *p, void *data) * * for now, they are expected to be zero, but ignored. */ -static int drbd_send_handshake(struct drbd_tconn *tconn) +static int drbd_send_features(struct drbd_tconn *tconn) { /* ASSERT current == mdev->tconn->receiver ... */ - struct p_handshake *p = tconn->data.sbuf; + struct p_connection_features *p = tconn->data.sbuf; int err; if (mutex_lock_interruptible(&tconn->data.mutex)) { @@ -4195,7 +4195,7 @@ static int drbd_send_handshake(struct drbd_tconn *tconn) memset(p, 0, sizeof(*p)); p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); - err = _conn_send_cmd(tconn, 0, &tconn->data, P_HAND_SHAKE, + err = _conn_send_cmd(tconn, 0, &tconn->data, P_CONNECTION_FEATURES, &p->head, sizeof(*p), 0); mutex_unlock(&tconn->data.mutex); return err; @@ -4208,15 +4208,15 @@ static int drbd_send_handshake(struct drbd_tconn *tconn) * -1 peer talks different language, * no point in trying again, please go standalone. */ -static int drbd_do_handshake(struct drbd_tconn *tconn) +static int drbd_do_features(struct drbd_tconn *tconn) { /* ASSERT current == tconn->receiver ... */ - struct p_handshake *p = tconn->data.rbuf; - const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80); + struct p_connection_features *p = tconn->data.rbuf; + const int expect = sizeof(struct p_connection_features) - sizeof(struct p_header80); struct packet_info pi; int err; - err = drbd_send_handshake(tconn); + err = drbd_send_features(tconn); if (err) return 0; @@ -4224,14 +4224,14 @@ static int drbd_do_handshake(struct drbd_tconn *tconn) if (err) return 0; - if (pi.cmd != P_HAND_SHAKE) { - conn_err(tconn, "expected HandShake packet, received: %s (0x%04x)\n", + if (pi.cmd != P_CONNECTION_FEATURES) { + conn_err(tconn, "expected ConnectionFeatures packet, received: %s (0x%04x)\n", cmdname(pi.cmd), pi.cmd); return -1; } if (pi.size != expect) { - conn_err(tconn, "expected HandShake length: %u, received: %u\n", + conn_err(tconn, "expected ConnectionFeatures length: %u, received: %u\n", expect, pi.size); return -1; } From 85f103d88c8eb91755eb3c103e5ead2c9389e35e Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 31 Mar 2011 12:06:48 +0200 Subject: [PATCH 282/609] drbd: introduce the "initialized" activity log transaction type So we can initialize a clean on disk activity log area, without the module complaining with loud assert messages because of checksum or magic value mismatches. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 586776195a7..90ebbbb3dc2 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -31,6 +31,11 @@ #include "drbd_int.h" #include "drbd_wrappers.h" + +enum al_transaction_types { + AL_TR_UPDATE = 0, + AL_TR_INITIALIZED = 0xffff +}; /* all fields on disc in big endian */ struct __packed al_transaction_on_disk { /* don't we all like magic */ @@ -44,7 +49,8 @@ struct __packed al_transaction_on_disk { __be32 crc32c; /* type of transaction, special transaction types like: - * purge-all, set-all-idle, set-all-active, ... to-be-defined */ + * purge-all, set-all-idle, set-all-active, ... to-be-defined + * see also enum al_transaction_types */ __be16 transaction_type; /* we currently allow only a few thousand extents, @@ -476,6 +482,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) int active_extents = 0; int transactions = 0; int found_valid = 0; + int found_initialized = 0; int from = 0; int to = 0; u32 from_tnr = 0; @@ -504,6 +511,10 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) /* invalid data in that block */ if (rv == 0) continue; + if (be16_to_cpu(b->transaction_type) == AL_TR_INITIALIZED) { + ++found_initialized; + continue; + } /* IO error */ if (rv == -1) { @@ -535,7 +546,8 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) } if (!found_valid) { - dev_warn(DEV, "No usable activity log found.\n"); + if (found_initialized != mx) + dev_warn(DEV, "No usable activity log found.\n"); mutex_unlock(&mdev->md_io_mutex); return 1; } From 181286ad22bf9bfb85de625e8501285de5261b35 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 31 Mar 2011 15:18:56 +0200 Subject: [PATCH 283/609] drbd: preparation commit, pass drbd_interval to drbd_al_begin/complete_io We want to avoid bio_split for bios crossing activity log boundaries. So we may need to activate two activity log extents "atomically". drbd_al_begin_io() needs to know more than just the start sector. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 8 ++++---- drivers/block/drbd/drbd_int.h | 4 ++-- drivers/block/drbd/drbd_receiver.c | 4 ++-- drivers/block/drbd/drbd_req.c | 12 ++++++------ drivers/block/drbd/drbd_worker.c | 10 +++++----- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 90ebbbb3dc2..5f0eeb74a57 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -205,9 +205,9 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) return al_ext; } -void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) +void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i) { - unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9)); + unsigned int enr = (i->sector >> (AL_EXTENT_SHIFT-9)); struct lc_element *al_ext; struct update_al_work al_work; @@ -254,9 +254,9 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) } } -void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector) +void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval *i) { - unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9)); + unsigned int enr = (i->sector >> (AL_EXTENT_SHIFT-9)); struct lc_element *extent; unsigned long flags; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index d886dc1fc40..8f43a366b82 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1584,8 +1584,8 @@ extern const char *drbd_conn_str(enum drbd_conns s); extern const char *drbd_role_str(enum drbd_role s); /* drbd_actlog.c */ -extern void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector); -extern void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector); +extern void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i); +extern void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval *i); extern void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector); extern int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector); extern int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e52ae9becdc..c456a141eee 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2061,7 +2061,7 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi) drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size); peer_req->flags |= EE_CALL_AL_COMPLETE_IO; peer_req->flags &= ~EE_MAY_SET_IN_SYNC; - drbd_al_begin_io(mdev, peer_req->i.sector); + drbd_al_begin_io(mdev, &peer_req->i); } err = drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR); @@ -2075,7 +2075,7 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi) drbd_remove_epoch_entry_interval(mdev, peer_req); spin_unlock_irq(&mdev->tconn->req_lock); if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) - drbd_al_complete_io(mdev, peer_req->i.sector); + drbd_al_complete_io(mdev, &peer_req->i); out_interrupted: drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index e380ffb7f4f..4406d829800 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -128,12 +128,12 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const if (s & RQ_LOCAL_MASK) { if (get_ldev_if_state(mdev, D_FAILED)) { if (s & RQ_IN_ACT_LOG) - drbd_al_complete_io(mdev, req->i.sector); + drbd_al_complete_io(mdev, &req->i); put_ldev(mdev); } else if (__ratelimit(&drbd_ratelimit_state)) { - dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu), " - "but my Disk seems to have failed :(\n", - (unsigned long long) req->i.sector); + dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu, %u), " + "but my Disk seems to have failed :(\n", + (unsigned long long) req->i.sector, req->i.size); } } } @@ -782,7 +782,7 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s * of transactional on-disk meta data updates. */ if (rw == WRITE && local && !test_bit(AL_SUSPENDED, &mdev->flags)) { req->rq_state |= RQ_IN_ACT_LOG; - drbd_al_begin_io(mdev, sector); + drbd_al_begin_io(mdev, &req->i); } remote = remote && drbd_should_do_remote(mdev->state); @@ -979,7 +979,7 @@ allocate_barrier: fail_free_complete: if (req->rq_state & RQ_IN_ACT_LOG) - drbd_al_complete_io(mdev, sector); + drbd_al_complete_io(mdev, &req->i); fail_and_free_req: if (local) { bio_put(req->private_bio); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 6e02b4efe9a..ab73aa2fb4e 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -101,7 +101,7 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel { unsigned long flags = 0; struct drbd_conf *mdev = peer_req->w.mdev; - sector_t e_sector; + struct drbd_interval i; int do_wake; u64 block_id; int do_al_complete_io; @@ -110,7 +110,7 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel * we may no longer access it, * it may be freed/reused already! * (as soon as we release the req_lock) */ - e_sector = peer_req->i.sector; + i = peer_req->i; do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO; block_id = peer_req->block_id; @@ -134,13 +134,13 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); if (block_id == ID_SYNCER) - drbd_rs_complete_io(mdev, e_sector); + drbd_rs_complete_io(mdev, i.sector); if (do_wake) wake_up(&mdev->ee_wait); if (do_al_complete_io) - drbd_al_complete_io(mdev, e_sector); + drbd_al_complete_io(mdev, &i); wake_asender(mdev->tconn); put_ldev(mdev); @@ -1301,7 +1301,7 @@ int w_restart_disk_io(struct drbd_work *w, int cancel) struct drbd_conf *mdev = w->mdev; if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG) - drbd_al_begin_io(mdev, req->i.sector); + drbd_al_begin_io(mdev, &req->i); /* Calling drbd_al_begin_io() out of the worker might deadlocks theoretically. Practically it can not deadlock, since this is only used when unfreezing IOs. All the extents of the requests From 7726547e67a1fda0d12e1de5ec917a2e5d4b8186 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 31 Mar 2011 16:00:51 +0200 Subject: [PATCH 284/609] drbd: prepare to activate two activity log extents at once Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 5f0eeb74a57..07f222cae98 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -207,15 +207,22 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i) { - unsigned int enr = (i->sector >> (AL_EXTENT_SHIFT-9)); - struct lc_element *al_ext; + /* for bios crossing activity log extent boundaries, + * we may need to activate two extents in one go */ + unsigned int enr[2]; + struct lc_element *al_ext[2] = { NULL, NULL }; struct update_al_work al_work; D_ASSERT(atomic_read(&mdev->local_cnt) > 0); - wait_event(mdev->al_wait, (al_ext = _al_get(mdev, enr))); + enr[0] = i->sector >> (AL_EXTENT_SHIFT-9); + enr[1] = (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); + wait_event(mdev->al_wait, (al_ext[0] = _al_get(mdev, enr[0]))); + if (enr[0] != enr[1]) + wait_event(mdev->al_wait, (al_ext[1] = _al_get(mdev, enr[1]))); - if (al_ext->lc_number != enr) { + if (al_ext[0]->lc_number != enr[0] || + (al_ext[1] && al_ext[1]->lc_number != enr[1])) { /* drbd_al_write_transaction(mdev,al_ext,enr); * recurses into generic_make_request(), which * disallows recursion, bios being serialized on the @@ -232,7 +239,8 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i) /* Double check: it may have been committed by someone else, * while we have been waiting for the lock. */ - if (al_ext->lc_number != enr) { + if (al_ext[0]->lc_number != enr[0] || + (al_ext[1] && al_ext[1]->lc_number != enr[1])) { init_completion(&al_work.event); al_work.w.cb = w_al_write_transaction; al_work.w.mdev = mdev; From 23361cf32b58efdf09945a64e1d8d41fa6117157 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 31 Mar 2011 16:36:43 +0200 Subject: [PATCH 285/609] drbd: get rid of bio_split, allow bios of "arbitrary" size Where "arbitrary" size is currently 1 MiB, which is the BIO_MAX_SIZE for architectures with 4k PAGE_CACHE_SIZE (most). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 20 +++++--- drivers/block/drbd/drbd_req.c | 90 +++++------------------------------ 2 files changed, 27 insertions(+), 83 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8f43a366b82..2dbffb3b548 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1326,8 +1326,16 @@ struct bm_extent { #endif #endif -#define HT_SHIFT 8 -#define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT)) +/* BIO_MAX_SIZE is 256 * PAGE_CACHE_SIZE, + * so for typical PAGE_CACHE_SIZE of 4k, that is (1<<20) Byte. + * Since we may live in a mixed-platform cluster, + * we limit us to a platform agnostic constant here for now. + * A followup commit may allow even bigger BIO sizes, + * once we thought that through. */ +#define DRBD_MAX_BIO_SIZE (1 << 20) +#if DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE +#error Architecture not supported: DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE +#endif #define DRBD_MAX_BIO_SIZE_SAFE (1 << 12) /* Works always = 4k */ #define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */ @@ -2231,20 +2239,20 @@ static inline bool may_inc_ap_bio(struct drbd_conf *mdev) return true; } -static inline bool inc_ap_bio_cond(struct drbd_conf *mdev, int count) +static inline bool inc_ap_bio_cond(struct drbd_conf *mdev) { bool rv = false; spin_lock_irq(&mdev->tconn->req_lock); rv = may_inc_ap_bio(mdev); if (rv) - atomic_add(count, &mdev->ap_bio_cnt); + atomic_inc(&mdev->ap_bio_cnt); spin_unlock_irq(&mdev->tconn->req_lock); return rv; } -static inline void inc_ap_bio(struct drbd_conf *mdev, int count) +static inline void inc_ap_bio(struct drbd_conf *mdev) { /* we wait here * as long as the device is suspended @@ -2254,7 +2262,7 @@ static inline void inc_ap_bio(struct drbd_conf *mdev, int count) * to avoid races with the reconnect code, * we need to atomic_inc within the spinlock. */ - wait_event(mdev->misc_wait, inc_ap_bio_cond(mdev, count)); + wait_event(mdev->misc_wait, inc_ap_bio_cond(mdev)); } static inline void dec_ap_bio(struct drbd_conf *mdev) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 4406d829800..6e0e3bb3316 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -998,7 +998,6 @@ fail_and_free_req: int drbd_make_request(struct request_queue *q, struct bio *bio) { - unsigned int s_enr, e_enr; struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; unsigned long start_time; @@ -1010,93 +1009,30 @@ int drbd_make_request(struct request_queue *q, struct bio *bio) D_ASSERT(bio->bi_size > 0); D_ASSERT(IS_ALIGNED(bio->bi_size, 512)); - /* to make some things easier, force alignment of requests within the - * granularity of our hash tables */ - s_enr = bio->bi_sector >> HT_SHIFT; - e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT; - - if (likely(s_enr == e_enr)) { - inc_ap_bio(mdev, 1); - return __drbd_make_request(mdev, bio, start_time); - } - - /* can this bio be split generically? - * Maybe add our own split-arbitrary-bios function. */ - if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_BIO_SIZE) { - /* rather error out here than BUG in bio_split */ - dev_err(DEV, "bio would need to, but cannot, be split: " - "(vcnt=%u,idx=%u,size=%u,sector=%llu)\n", - bio->bi_vcnt, bio->bi_idx, bio->bi_size, - (unsigned long long)bio->bi_sector); - bio_endio(bio, -EINVAL); - } else { - /* This bio crosses some boundary, so we have to split it. */ - struct bio_pair *bp; - /* works for the "do not cross hash slot boundaries" case - * e.g. sector 262269, size 4096 - * s_enr = 262269 >> 6 = 4097 - * e_enr = (262269+8-1) >> 6 = 4098 - * HT_SHIFT = 6 - * sps = 64, mask = 63 - * first_sectors = 64 - (262269 & 63) = 3 - */ - const sector_t sect = bio->bi_sector; - const int sps = 1 << HT_SHIFT; /* sectors per slot */ - const int mask = sps - 1; - const sector_t first_sectors = sps - (sect & mask); - bp = bio_split(bio, first_sectors); - - /* we need to get a "reference count" (ap_bio_cnt) - * to avoid races with the disconnect/reconnect/suspend code. - * In case we need to split the bio here, we need to get three references - * atomically, otherwise we might deadlock when trying to submit the - * second one! */ - inc_ap_bio(mdev, 3); - - D_ASSERT(e_enr == s_enr + 1); - - while (__drbd_make_request(mdev, &bp->bio1, start_time)) - inc_ap_bio(mdev, 1); - - while (__drbd_make_request(mdev, &bp->bio2, start_time)) - inc_ap_bio(mdev, 1); - - dec_ap_bio(mdev); - - bio_pair_release(bp); - } - return 0; + inc_ap_bio(mdev); + return __drbd_make_request(mdev, bio, start_time); } -/* This is called by bio_add_page(). With this function we reduce - * the number of BIOs that span over multiple DRBD_MAX_BIO_SIZEs - * units (was AL_EXTENTs). +/* This is called by bio_add_page(). * - * we do the calculation within the lower 32bit of the byte offsets, - * since we don't care for actual offset, but only check whether it - * would cross "activity log extent" boundaries. + * q->max_hw_sectors and other global limits are already enforced there. + * + * We need to call down to our lower level device, + * in case it has special restrictions. + * + * We also may need to enforce configured max-bio-bvecs limits. * * As long as the BIO is empty we have to allow at least one bvec, - * regardless of size and offset. so the resulting bio may still - * cross extent boundaries. those are dealt with (bio_split) in - * drbd_make_request. + * regardless of size and offset, so no need to ask lower levels. */ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec) { struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; - unsigned int bio_offset = - (unsigned int)bvm->bi_sector << 9; /* 32 bit */ unsigned int bio_size = bvm->bi_size; - int limit, backing_limit; + int limit = DRBD_MAX_BIO_SIZE; + int backing_limit; - limit = DRBD_MAX_BIO_SIZE - - ((bio_offset & (DRBD_MAX_BIO_SIZE-1)) + bio_size); - if (limit < 0) - limit = 0; - if (bio_size == 0) { - if (limit <= bvec->bv_len) - limit = bvec->bv_len; - } else if (limit && get_ldev(mdev)) { + if (bio_size && get_ldev(mdev)) { struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; if (b->merge_bvec_fn) { From e15766e9c94f7fa3396eff4ffbbf30dea8c0e22a Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 1 Apr 2011 10:38:30 +0200 Subject: [PATCH 286/609] drbd: improvements to activate/deactivate multiple activity log extents Recent commit drbd: get rid of bio_split, allow bios of "arbitrary" size had a reference count leak: it only deactivated the first of several activity log extents for intervals crossing extent boundaries. This commit generalizes on bios spanning multiple activity log extents in drbd_al_begin_io, and adds the necessary loop around lc_put in drbd_al_complete_io as well. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 46 ++++++++++++++++---------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 07f222cae98..50b851e389e 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -209,20 +209,16 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i) { /* for bios crossing activity log extent boundaries, * we may need to activate two extents in one go */ - unsigned int enr[2]; - struct lc_element *al_ext[2] = { NULL, NULL }; - struct update_al_work al_work; + unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); + unsigned last = (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); + unsigned enr; D_ASSERT(atomic_read(&mdev->local_cnt) > 0); - enr[0] = i->sector >> (AL_EXTENT_SHIFT-9); - enr[1] = (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); - wait_event(mdev->al_wait, (al_ext[0] = _al_get(mdev, enr[0]))); - if (enr[0] != enr[1]) - wait_event(mdev->al_wait, (al_ext[1] = _al_get(mdev, enr[1]))); + for (enr = first; enr <= last; enr++) + wait_event(mdev->al_wait, _al_get(mdev, enr) != NULL); - if (al_ext[0]->lc_number != enr[0] || - (al_ext[1] && al_ext[1]->lc_number != enr[1])) { + if (mdev->act_log->pending_changes) { /* drbd_al_write_transaction(mdev,al_ext,enr); * recurses into generic_make_request(), which * disallows recursion, bios being serialized on the @@ -239,8 +235,8 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i) /* Double check: it may have been committed by someone else, * while we have been waiting for the lock. */ - if (al_ext[0]->lc_number != enr[0] || - (al_ext[1] && al_ext[1]->lc_number != enr[1])) { + if (mdev->act_log->pending_changes) { + struct update_al_work al_work; init_completion(&al_work.event); al_work.w.cb = w_al_write_transaction; al_work.w.mdev = mdev; @@ -264,24 +260,28 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i) void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval *i) { - unsigned int enr = (i->sector >> (AL_EXTENT_SHIFT-9)); + /* for bios crossing activity log extent boundaries, + * we may need to activate two extents in one go */ + unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); + unsigned last = (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); + unsigned enr; struct lc_element *extent; unsigned long flags; + bool wake = false; spin_lock_irqsave(&mdev->al_lock, flags); - extent = lc_find(mdev->act_log, enr); - - if (!extent) { - spin_unlock_irqrestore(&mdev->al_lock, flags); - dev_err(DEV, "al_complete_io() called on inactive extent %u\n", enr); - return; + for (enr = first; enr <= last; enr++) { + extent = lc_find(mdev->act_log, enr); + if (!extent) { + dev_err(DEV, "al_complete_io() called on inactive extent %u\n", enr); + continue; + } + if (lc_put(mdev->act_log, extent) == 0) + wake = true; } - - if (lc_put(mdev->act_log, extent) == 0) - wake_up(&mdev->al_wait); - spin_unlock_irqrestore(&mdev->al_lock, flags); + wake_up(&mdev->al_wait); } #if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT) From 78bae59b1b7bc06c84e292e9ecf42c013723e057 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 28 Mar 2011 15:40:12 +0200 Subject: [PATCH 287/609] drbd: Introduced drbd_read_state() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 9 +++++++++ drivers/block/drbd/drbd_nl.c | 5 ++--- drivers/block/drbd/drbd_receiver.c | 6 +++--- drivers/block/drbd/drbd_state.c | 15 +++++++-------- drivers/block/drbd/drbd_state.h | 6 +++--- drivers/block/drbd/drbd_worker.c | 6 +++--- 6 files changed, 27 insertions(+), 20 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 2dbffb3b548..24aed86e736 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1682,6 +1682,15 @@ _drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, return rv; } +static inline union drbd_state drbd_read_state(struct drbd_conf *mdev) +{ + union drbd_state rv; + + rv = mdev->state; + + return rv; +} + #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where) { diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 85290a9beb6..75caac70aaa 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1503,8 +1503,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) drbd_suspend_al(mdev); /* IO is still suspended here... */ spin_lock_irq(&mdev->tconn->req_lock); - os = mdev->state; - ns.i = os.i; + os = drbd_read_state(mdev); + ns = os; /* If MDF_CONSISTENT is not set go into inconsistent state, otherwise investigate MDF_WasUpToDate... If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state, @@ -1546,7 +1546,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); - ns = mdev->state; spin_unlock_irq(&mdev->tconn->req_lock); if (rv < SS_SUCCESS) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c456a141eee..28cb19c8e26 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3473,7 +3473,7 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) spin_lock_irq(&mdev->tconn->req_lock); retry: - os = ns = mdev->state; + os = ns = drbd_read_state(mdev); spin_unlock_irq(&mdev->tconn->req_lock); /* peer says his disk is uptodate, while we think it is inconsistent, @@ -3559,7 +3559,7 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) } spin_lock_irq(&mdev->tconn->req_lock); - if (mdev->state.i != os.i) + if (os.i != drbd_read_state(mdev).i) goto retry; clear_bit(CONSIDER_RESYNC, &mdev->flags); ns.peer = peer_state.role; @@ -3581,7 +3581,7 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) return -EIO; } rv = _drbd_set_state(mdev, ns, cs_flags, NULL); - ns = mdev->state; + ns = drbd_read_state(mdev); spin_unlock_irq(&mdev->tconn->req_lock); if (rv < SS_SUCCESS) { diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index ca77da38a0d..411d05f885e 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -163,9 +163,8 @@ drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, enum drbd_state_rv rv; spin_lock_irqsave(&mdev->tconn->req_lock, flags); - ns = apply_mask_val(mdev->state, mask, val); + ns = apply_mask_val(drbd_read_state(mdev), mask, val); rv = _drbd_set_state(mdev, ns, f, NULL); - ns = mdev->state; spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); return rv; @@ -198,7 +197,7 @@ _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, return SS_CW_FAILED_BY_PEER; spin_lock_irqsave(&mdev->tconn->req_lock, flags); - os = mdev->state; + os = drbd_read_state(mdev); ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL); rv = is_valid_transition(os, ns); if (rv == SS_SUCCESS) @@ -244,7 +243,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, mutex_lock(mdev->state_mutex); spin_lock_irqsave(&mdev->tconn->req_lock, flags); - os = mdev->state; + os = drbd_read_state(mdev); ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL); rv = is_valid_transition(os, ns); if (rv < SS_SUCCESS) { @@ -280,7 +279,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, goto abort; } spin_lock_irqsave(&mdev->tconn->req_lock, flags); - ns = apply_mask_val(mdev->state, mask, val); + ns = apply_mask_val(drbd_read_state(mdev), mask, val); rv = _drbd_set_state(mdev, ns, f, &done); } else { rv = _drbd_set_state(mdev, ns, f, &done); @@ -812,7 +811,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, const char *warn_sync_abort = NULL; struct after_state_chg_work *ascw; - os = mdev->state; + os = drbd_read_state(mdev); ns = sanitize_state(mdev, ns, &warn_sync_abort); if (ns.i == os.i) @@ -1430,7 +1429,7 @@ conn_is_valid_transition(struct drbd_tconn *tconn, union drbd_state mask, union int vnr; idr_for_each_entry(&tconn->volumes, mdev, vnr) { - os = mdev->state; + os = drbd_read_state(mdev); ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL); if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED) @@ -1474,7 +1473,7 @@ conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state tconn->cstate = val.conn; idr_for_each_entry(&tconn->volumes, mdev, vnr) { - os = mdev->state; + os = drbd_read_state(mdev); ns = apply_mask_val(os, mask, val); ns = sanitize_state(mdev, ns, NULL); diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index c0331f18371..fdcfab9c1d8 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -48,12 +48,12 @@ struct drbd_tconn; val.T2 = (S2); val.T3 = (S3); val; }) #define _NS(D, T, S) \ - D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T = (S); __ns; }) + D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T = (S); __ns; }) #define _NS2(D, T1, S1, T2, S2) \ - D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ + D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T1 = (S1); \ __ns.T2 = (S2); __ns; }) #define _NS3(D, T1, S1, T2, S2, T3, S3) \ - D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ + D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T1 = (S1); \ __ns.T2 = (S2); __ns.T3 = (S3); __ns; }) enum chg_state_flags { diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index ab73aa2fb4e..7350466ff30 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -781,7 +781,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) ping_peer(mdev); spin_lock_irq(&mdev->tconn->req_lock); - os = mdev->state; + os = drbd_read_state(mdev); verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T); @@ -1546,7 +1546,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) } write_lock_irq(&global_state_lock); - ns = mdev->state; + ns = drbd_read_state(mdev); ns.aftr_isp = !_drbd_may_sync_now(mdev); @@ -1558,7 +1558,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) ns.pdsk = D_INCONSISTENT; r = __drbd_set_state(mdev, ns, CS_VERBOSE, NULL); - ns = mdev->state; + ns = drbd_read_state(mdev); if (ns.conn < C_CONNECTED) r = SS_UNKNOWN_ERROR; From 2aebfabb17ecc434623732896a5834a9cb82a82d Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 28 Mar 2011 16:48:11 +0200 Subject: [PATCH 288/609] drbd: Renamed id_susp(union drbd_state s) to drbd_suspended(struct drbd_conf *) Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 6 +++--- drivers/block/drbd/drbd_nl.c | 4 ++-- drivers/block/drbd/drbd_proc.c | 2 +- drivers/block/drbd/drbd_receiver.c | 4 ++-- drivers/block/drbd/drbd_req.c | 6 +++--- drivers/block/drbd/drbd_state.c | 7 ++++++- 6 files changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 24aed86e736..cde547bdddf 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -2217,16 +2217,16 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev) return 1; } -static inline int is_susp(union drbd_state s) +static inline int drbd_suspended(struct drbd_conf *mdev) { - return s.susp || s.susp_nod || s.susp_fen; + return mdev->state.susp || mdev->state.susp_nod || mdev->state.susp_fen; } static inline bool may_inc_ap_bio(struct drbd_conf *mdev) { int mxb = drbd_get_max_buffers(mdev); - if (is_susp(mdev->state)) + if (drbd_suspended(mdev)) return false; if (test_bit(SUSPEND_IO, &mdev->flags)) return false; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 75caac70aaa..45a84fa660a 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -731,7 +731,7 @@ char *ppsize(char *buf, unsigned long long size) void drbd_suspend_io(struct drbd_conf *mdev) { set_bit(SUSPEND_IO, &mdev->flags); - if (is_susp(mdev->state)) + if (drbd_suspended(mdev)) return; wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); } @@ -1355,7 +1355,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) drbd_suspend_io(mdev); /* also wait for the last barrier ack. */ - wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || is_susp(mdev->state)); + wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || drbd_suspended(mdev)); /* and for any other previously queued work */ drbd_flush_workqueue(mdev); diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 36c9a6cecdc..a4dbdbc52c1 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -250,7 +250,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v) drbd_disk_str(mdev->state.pdsk), (mdev->tconn->net_conf == NULL ? ' ' : (mdev->tconn->net_conf->wire_protocol - DRBD_PROT_A+'A')), - is_susp(mdev->state) ? 's' : 'r', + drbd_suspended(mdev) ? 's' : 'r', mdev->state.aftr_isp ? 'a' : '-', mdev->state.peer_isp ? 'p' : '-', mdev->state.user_isp ? 'u' : '-', diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 28cb19c8e26..7bc7b098787 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3568,7 +3568,7 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING) ns.disk = mdev->new_state_tmp.disk; cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD); - if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED && + if (ns.pdsk == D_CONSISTENT && drbd_suspended(mdev) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED && test_bit(NEW_CUR_UUID, &mdev->flags)) { /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this for temporal network outages! */ @@ -4123,7 +4123,7 @@ static int drbd_disconnected(int vnr, void *p, void *data) kfree(mdev->p_uuid); mdev->p_uuid = NULL; - if (!is_susp(mdev->state)) + if (!drbd_suspended(mdev)) tl_clear(mdev->tconn); drbd_md_sync(mdev); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 6e0e3bb3316..e296d7ed6b8 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -303,7 +303,7 @@ static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_e { struct drbd_conf *mdev = req->w.mdev; - if (!is_susp(mdev->state)) + if (!drbd_suspended(mdev)) _req_may_be_done(req, m); } @@ -789,7 +789,7 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s send_oos = rw == WRITE && drbd_should_send_out_of_sync(mdev->state); D_ASSERT(!(remote && send_oos)); - if (!(local || remote) && !is_susp(mdev->state)) { + if (!(local || remote) && !drbd_suspended(mdev)) { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); err = -EIO; @@ -830,7 +830,7 @@ allocate_barrier: } } - if (is_susp(mdev->state)) { + if (drbd_suspended(mdev)) { /* If we got suspended, use the retry mechanism of generic_make_request() to restart processing of this bio. In the next call to drbd_make_request diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 411d05f885e..ae4a76ce8c1 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -47,6 +47,11 @@ static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_st static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns, const char **warn_sync_abort); +static inline bool is_susp(union drbd_state s) +{ + return s.susp || s.susp_nod || s.susp_fen; +} + bool conn_all_vols_unconf(struct drbd_tconn *tconn) { struct drbd_conf *mdev; @@ -1161,7 +1166,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, if (get_ldev(mdev)) { if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { - if (is_susp(mdev->state)) { + if (drbd_suspended(mdev)) { set_bit(NEW_CUR_UUID, &mdev->flags); } else { drbd_uuid_new_current(mdev); From 8e0af25fa85c9efe393128b0a0dd874981edb22f Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 28 Mar 2011 16:18:39 +0200 Subject: [PATCH 289/609] drbd: Moved susp, susp_nod and susp_fen to the connection object Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 10 +++++++++- drivers/block/drbd/drbd_state.c | 3 +++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index cde547bdddf..f76b3932d1e 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -862,6 +862,9 @@ struct drbd_tconn { /* is a resource from the config file */ struct list_head all_tconn; /* linked on global drbd_tconns */ struct idr volumes; /* to mdev mapping */ enum drbd_conns cstate; /* Only C_STANDALONE to C_WF_REPORT_PARAMS */ + unsigned susp:1; /* IO suspended by user */ + unsigned susp_nod:1; /* IO suspended because no data */ + unsigned susp_fen:1; /* IO suspended because fence peer handler runs */ struct mutex cstate_mutex; /* Protects graceful disconnects */ unsigned long flags; @@ -1687,6 +1690,9 @@ static inline union drbd_state drbd_read_state(struct drbd_conf *mdev) union drbd_state rv; rv = mdev->state; + rv.susp = mdev->tconn->susp; + rv.susp_nod = mdev->tconn->susp_nod; + rv.susp_fen = mdev->tconn->susp_fen; return rv; } @@ -2219,7 +2225,9 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev) static inline int drbd_suspended(struct drbd_conf *mdev) { - return mdev->state.susp || mdev->state.susp_nod || mdev->state.susp_fen; + struct drbd_tconn *tconn = mdev->tconn; + + return tconn->susp || tconn->susp_fen || tconn->susp_nod; } static inline bool may_inc_ap_bio(struct drbd_conf *mdev) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index ae4a76ce8c1..902007c807e 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -861,6 +861,9 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, atomic_inc(&mdev->local_cnt); mdev->state = ns; + mdev->tconn->susp = ns.susp; + mdev->tconn->susp_nod = ns.susp_nod; + mdev->tconn->susp_fen = ns.susp_fen; /* solve the race between becoming unconfigured, * worker doing the cleanup, and From da9fbc276e9d42638df68e8515d06750695612ca Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 29 Mar 2011 10:52:01 +0200 Subject: [PATCH 290/609] drbd: Introduced a new type union drbd_dev_state Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 6 +++--- drivers/block/drbd/drbd_main.c | 5 +---- drivers/block/drbd/drbd_nl.c | 4 ++-- drivers/block/drbd/drbd_req.h | 4 ++-- drivers/block/drbd/drbd_state.c | 6 +++--- drivers/block/drbd/drbd_state.h | 34 +++++++++++++++++++++++++++++++++ 6 files changed, 45 insertions(+), 14 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index f76b3932d1e..aa42ccb5f54 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -935,7 +935,7 @@ struct drbd_conf { /* Used after attach while negotiating new disk state. */ union drbd_state new_state_tmp; - union drbd_state state; + union drbd_dev_state state; wait_queue_head_t misc_wait; wait_queue_head_t state_wait; /* upon each state change. */ unsigned int send_cnt; @@ -1689,7 +1689,7 @@ static inline union drbd_state drbd_read_state(struct drbd_conf *mdev) { union drbd_state rv; - rv = mdev->state; + rv.i = mdev->state.i; rv.susp = mdev->tconn->susp; rv.susp_nod = mdev->tconn->susp_nod; rv.susp_fen = mdev->tconn->susp_fen; @@ -2155,7 +2155,7 @@ static inline int drbd_get_max_buffers(struct drbd_conf *mdev) static inline int drbd_state_is_stable(struct drbd_conf *mdev) { - union drbd_state s = mdev->state; + union drbd_dev_state s = mdev->state; /* DO NOT add a default clause, we want the compiler to warn us * for any newly introduced state we may have forgotten to add here */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 71e3470304d..064680c7564 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1821,15 +1821,12 @@ static void drbd_set_defaults(struct drbd_conf *mdev) { /* Beware! The actual layout differs * between big endian and little endian */ - mdev->state = (union drbd_state) { + mdev->state = (union drbd_dev_state) { { .role = R_SECONDARY, .peer = R_UNKNOWN, .conn = C_STANDALONE, .disk = D_DISKLESS, .pdsk = D_UNKNOWN, - .susp = 0, - .susp_nod = 0, - .susp_fen = 0 } }; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 45a84fa660a..ed08dce9aaf 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1435,7 +1435,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) clear_bit(CRASHED_PRIMARY, &mdev->flags); if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) && - !(mdev->state.role == R_PRIMARY && mdev->state.susp_nod)) { + !(mdev->state.role == R_PRIMARY && mdev->tconn->susp_nod)) { set_bit(CRASHED_PRIMARY, &mdev->flags); cp_discovered = 1; } @@ -2348,7 +2348,7 @@ out: int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info) { - union drbd_state s; + union drbd_dev_state s; enum drbd_ret_code retcode; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 7fb3e06369d..0dabfa9c82f 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -295,7 +295,7 @@ static inline int req_mod(struct drbd_request *req, return rv; } -static inline bool drbd_should_do_remote(union drbd_state s) +static inline bool drbd_should_do_remote(union drbd_dev_state s) { return s.pdsk == D_UP_TO_DATE || (s.pdsk >= D_INCONSISTENT && @@ -305,7 +305,7 @@ static inline bool drbd_should_do_remote(union drbd_state s) That is equivalent since before 96 IO was frozen in the C_WF_BITMAP* states. */ } -static inline bool drbd_should_send_out_of_sync(union drbd_state s) +static inline bool drbd_should_send_out_of_sync(union drbd_dev_state s) { return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S; /* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 902007c807e..b6a14a30ba2 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -860,7 +860,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, (os.disk != D_DISKLESS && ns.disk == D_DISKLESS)) atomic_inc(&mdev->local_cnt); - mdev->state = ns; + mdev->state.i = ns.i; mdev->tconn->susp = ns.susp; mdev->tconn->susp_nod = ns.susp_nod; mdev->tconn->susp_fen = ns.susp_fen; @@ -1393,7 +1393,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) void conn_old_common_state(struct drbd_tconn *tconn, union drbd_state *pcs, enum chg_state_flags *pf) { enum chg_state_flags flags = ~0; - union drbd_state os, cs = {}; /* old_state, common_state */ + union drbd_dev_state os, cs = {}; /* old_state, common_state */ struct drbd_conf *mdev; int vnr, first_vol = 1; @@ -1424,7 +1424,7 @@ void conn_old_common_state(struct drbd_tconn *tconn, union drbd_state *pcs, enum *pf |= CS_DC_MASK; *pf &= flags; - *pcs = cs; + (*pcs).i = cs.i; } static enum drbd_state_rv diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index fdcfab9c1d8..0f8441de29c 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -72,6 +72,40 @@ enum chg_state_flags { CS_IGN_OUTD_FAIL = 1 << 10, }; +/* drbd_dev_state and drbd_state are different types. This is to stress the + small difference. There is no suspended flag (.susp), and no suspended + while fence handler runs flas (susp_fen). */ +union drbd_dev_state { + struct { +#if defined(__LITTLE_ENDIAN_BITFIELD) + unsigned role:2 ; /* 3/4 primary/secondary/unknown */ + unsigned peer:2 ; /* 3/4 primary/secondary/unknown */ + unsigned conn:5 ; /* 17/32 cstates */ + unsigned disk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */ + unsigned pdsk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */ + unsigned _unused:1 ; + unsigned aftr_isp:1 ; /* isp .. imposed sync pause */ + unsigned peer_isp:1 ; + unsigned user_isp:1 ; + unsigned _pad:11; /* 0 unused */ +#elif defined(__BIG_ENDIAN_BITFIELD) + unsigned _pad:11; + unsigned user_isp:1 ; + unsigned peer_isp:1 ; + unsigned aftr_isp:1 ; /* isp .. imposed sync pause */ + unsigned _unused:1 ; + unsigned pdsk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */ + unsigned disk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */ + unsigned conn:5 ; /* 17/32 cstates */ + unsigned peer:2 ; /* 3/4 primary/secondary/unknown */ + unsigned role:2 ; /* 3/4 primary/secondary/unknown */ +#else +# error "this endianess is not supported" +#endif + }; + unsigned int i; +}; + extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, union drbd_state mask, From 5f082f98f5c8e7daee08505bcc4775aa82ad6d84 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 29 Mar 2011 13:20:58 +0200 Subject: [PATCH 291/609] drbd: Renamed nms to ns_max Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index b6a14a30ba2..8ec5574ad2a 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1358,7 +1358,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, struct after_conn_state_chg_work { struct drbd_work w; enum drbd_conns oc; - union drbd_state nms; /* new, max state, over all mdevs */ + union drbd_state ns_max; /* new, max state, over all mdevs */ enum chg_state_flags flags; }; @@ -1376,12 +1376,12 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) container_of(w, struct after_conn_state_chg_work, w); struct drbd_tconn *tconn = w->tconn; enum drbd_conns oc = acscw->oc; - union drbd_state nms = acscw->nms; + union drbd_state ns_max = acscw->ns_max; kfree(acscw); /* Upon network configuration, we need to start the receiver */ - if (oc == C_STANDALONE && nms.conn == C_UNCONNECTED) + if (oc == C_STANDALONE && ns_max.conn == C_UNCONNECTED) drbd_thread_start(&tconn->receiver); //conn_err(tconn, STATE_FMT, STATE_ARGS("nms", nms)); @@ -1558,7 +1558,7 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ enum drbd_state_rv rv = SS_SUCCESS; struct after_conn_state_chg_work *acscw; enum drbd_conns oc = tconn->cstate; - union drbd_state ms, os; + union drbd_state ns_max, os; rv = is_valid_conn_transition(oc, val.conn); if (rv < SS_SUCCESS) @@ -1576,14 +1576,14 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ } conn_old_common_state(tconn, &os, &flags); - ms = conn_set_state(tconn, mask, val, flags); - ms.conn = val.conn; - conn_pr_state_change(tconn, os, ms, flags); + ns_max = conn_set_state(tconn, mask, val, flags); + ns_max.conn = val.conn; + conn_pr_state_change(tconn, os, ns_max, flags); acscw = kmalloc(sizeof(*acscw), GFP_ATOMIC); if (acscw) { acscw->oc = os.conn; - acscw->nms = ms; + acscw->ns_max = ns_max; acscw->flags = flags; acscw->w.cb = w_after_conn_state_ch; acscw->w.tconn = tconn; From 8c7e16c39ffe77438906ff9d6196a80d171e9e32 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 29 Mar 2011 14:01:02 +0200 Subject: [PATCH 292/609] drbd: Calculate and provide ns_min to the w_after_conn_state_ch() work Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 42 ++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 8ec5574ad2a..0ce665366d6 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1358,6 +1358,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, struct after_conn_state_chg_work { struct drbd_work w; enum drbd_conns oc; + union drbd_state ns_min; union drbd_state ns_max; /* new, max state, over all mdevs */ enum chg_state_flags flags; }; @@ -1468,11 +1469,17 @@ conn_is_valid_transition(struct drbd_tconn *tconn, union drbd_state mask, union return rv; } -static union drbd_state +void conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, - enum chg_state_flags flags) + union drbd_state *pns_min, union drbd_state *pns_max, enum chg_state_flags flags) { - union drbd_state ns, os, ms = { }; + union drbd_state ns, os, ns_max = { }; + union drbd_state ns_min = { + { .role = R_MASK, + .peer = R_MASK, + .disk = D_MASK, + .pdsk = D_MASK + } }; struct drbd_conf *mdev; enum drbd_state_rv rv; int vnr; @@ -1492,13 +1499,26 @@ conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state if (rv < SS_SUCCESS) BUG(); - ms.role = max_role(mdev->state.role, ms.role); - ms.peer = max_role(mdev->state.peer, ms.peer); - ms.disk = max_t(enum drbd_disk_state, mdev->state.disk, ms.disk); - ms.pdsk = max_t(enum drbd_disk_state, mdev->state.pdsk, ms.pdsk); + ns.i = mdev->state.i; + ns_max.role = max_role(ns.role, ns_max.role); + ns_max.peer = max_role(ns.peer, ns_max.peer); + ns_max.conn = max_t(enum drbd_conns, ns.conn, ns_max.conn); + ns_max.disk = max_t(enum drbd_disk_state, ns.disk, ns_max.disk); + ns_max.pdsk = max_t(enum drbd_disk_state, ns.pdsk, ns_max.pdsk); + + ns_min.role = min_role(ns.role, ns_min.role); + ns_min.peer = min_role(ns.peer, ns_min.peer); + ns_min.conn = min_t(enum drbd_conns, ns.conn, ns_min.conn); + ns_min.disk = min_t(enum drbd_disk_state, ns.disk, ns_min.disk); + ns_min.pdsk = min_t(enum drbd_disk_state, ns.pdsk, ns_min.pdsk); } - return ms; + ns_min.susp = ns_max.susp = tconn->susp; + ns_min.susp_nod = ns_max.susp_nod = tconn->susp_nod; + ns_min.susp_fen = ns_max.susp_fen = tconn->susp_fen; + + *pns_min = ns_min; + *pns_max = ns_max; } static enum drbd_state_rv @@ -1558,7 +1578,7 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ enum drbd_state_rv rv = SS_SUCCESS; struct after_conn_state_chg_work *acscw; enum drbd_conns oc = tconn->cstate; - union drbd_state ns_max, os; + union drbd_state ns_max, ns_min, os; rv = is_valid_conn_transition(oc, val.conn); if (rv < SS_SUCCESS) @@ -1576,13 +1596,13 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ } conn_old_common_state(tconn, &os, &flags); - ns_max = conn_set_state(tconn, mask, val, flags); - ns_max.conn = val.conn; + conn_set_state(tconn, mask, val, &ns_min, &ns_max, flags); conn_pr_state_change(tconn, os, ns_max, flags); acscw = kmalloc(sizeof(*acscw), GFP_ATOMIC); if (acscw) { acscw->oc = os.conn; + acscw->ns_min = ns_min; acscw->ns_max = ns_max; acscw->flags = flags; acscw->w.cb = w_after_conn_state_ch; From 19f83c76616a0c2112943b5af65483e16a1986da Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 29 Mar 2011 14:21:03 +0200 Subject: [PATCH 293/609] drbd: Implemented conn_lowest_conn() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 2 +- drivers/block/drbd/drbd_state.c | 12 ++++++++++++ drivers/block/drbd/drbd_state.h | 1 + 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ed08dce9aaf..5cf116471aa 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1590,7 +1590,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) static int adm_detach(struct drbd_conf *mdev) { - enum drbd_ret_code retcode; + enum drbd_state_rv retcode; drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ retcode = drbd_request_state(mdev, NS(disk, D_DISKLESS)); wait_event(mdev->misc_wait, diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 0ce665366d6..36703fee3b0 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -133,6 +133,18 @@ enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn) return ds; } +enum drbd_conns conn_lowest_conn(struct drbd_tconn *tconn) +{ + enum drbd_conns conn = C_MASK; + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) + conn = min_t(enum drbd_conns, conn, mdev->state.conn); + + return conn; +} + /** * cl_wide_st_chg() - true if the state change is a cluster wide one * @mdev: DRBD device. diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index 0f8441de29c..286af0612dc 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -154,5 +154,6 @@ enum drbd_role conn_highest_role(struct drbd_tconn *tconn); enum drbd_role conn_highest_peer(struct drbd_tconn *tconn); enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn); enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn); +enum drbd_conns conn_lowest_conn(struct drbd_tconn *tconn); #endif From 4669265a7bb146ae072951e26c8527fdc733eff6 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 29 Mar 2011 18:15:49 +0200 Subject: [PATCH 294/609] drbd: Implemented conn_lowest_disk() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 12 ++++++++++++ drivers/block/drbd/drbd_state.h | 1 + 2 files changed, 13 insertions(+) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 36703fee3b0..896d5dc833c 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -121,6 +121,18 @@ enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn) return ds; } +enum drbd_disk_state conn_lowest_disk(struct drbd_tconn *tconn) +{ + enum drbd_disk_state ds = D_MASK; + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) + ds = min_t(enum drbd_disk_state, ds, mdev->state.disk); + + return ds; +} + enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn) { enum drbd_disk_state ds = D_DISKLESS; diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index 286af0612dc..757f9d93643 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -153,6 +153,7 @@ static inline int drbd_request_state(struct drbd_conf *mdev, enum drbd_role conn_highest_role(struct drbd_tconn *tconn); enum drbd_role conn_highest_peer(struct drbd_tconn *tconn); enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn); +enum drbd_disk_state conn_lowest_disk(struct drbd_tconn *tconn); enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn); enum drbd_conns conn_lowest_conn(struct drbd_tconn *tconn); From a6d00c8ec3614ee0cc365b35a76cc0b705f4c998 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 29 Mar 2011 18:16:11 +0200 Subject: [PATCH 295/609] drbd: Implemented IO thawing for multiple volumes Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 72 ++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 32 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 896d5dc833c..06bbfc772fe 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1088,8 +1088,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags) { enum drbd_fencing_p fp; - enum drbd_req_event what = NOTHING; - union drbd_state nsm; struct sib_info sib; sib.sib_reason = SIB_STATE_CHANGE; @@ -1118,44 +1116,21 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* Here we have the actions that are performed after a state change. This function might sleep */ - nsm.i = -1; if (ns.susp_nod) { - if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) + enum drbd_req_event what = NOTHING; + + if (os.conn < C_CONNECTED && conn_lowest_conn(mdev->tconn) >= C_CONNECTED) what = RESEND; - if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING) + if (os.disk == D_ATTACHING && conn_lowest_disk(mdev->tconn) > D_ATTACHING) what = RESTART_FROZEN_DISK_IO; - if (what != NOTHING) - nsm.susp_nod = 0; - } - - if (ns.susp_fen) { - /* case1: The outdate peer handler is successful: */ - if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) { - tl_clear(mdev->tconn); - if (test_bit(NEW_CUR_UUID, &mdev->flags)) { - drbd_uuid_new_current(mdev); - clear_bit(NEW_CUR_UUID, &mdev->flags); - } + if (what != NOTHING) { spin_lock_irq(&mdev->tconn->req_lock); - _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL); + _tl_restart(mdev->tconn, what); + _drbd_set_state(_NS(mdev, susp_nod, 0), CS_VERBOSE, NULL); spin_unlock_irq(&mdev->tconn->req_lock); } - /* case2: The connection was established again: */ - if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { - clear_bit(NEW_CUR_UUID, &mdev->flags); - what = RESEND; - nsm.susp_fen = 0; - } - } - - if (what != NOTHING) { - spin_lock_irq(&mdev->tconn->req_lock); - _tl_restart(mdev->tconn, what); - nsm.i &= mdev->state.i; - _drbd_set_state(mdev, nsm, CS_VERBOSE, NULL); - spin_unlock_irq(&mdev->tconn->req_lock); } /* Became sync source. With protocol >= 96, we still need to send out @@ -1402,6 +1377,9 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) struct drbd_tconn *tconn = w->tconn; enum drbd_conns oc = acscw->oc; union drbd_state ns_max = acscw->ns_max; + union drbd_state ns_min = acscw->ns_min; + struct drbd_conf *mdev; + int vnr; kfree(acscw); @@ -1409,6 +1387,36 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) if (oc == C_STANDALONE && ns_max.conn == C_UNCONNECTED) drbd_thread_start(&tconn->receiver); + if (ns_max.susp_fen) { + /* case1: The outdate peer handler is successful: */ + if (ns_max.pdsk <= D_OUTDATED) { + tl_clear(tconn); + idr_for_each_entry(&tconn->volumes, mdev, vnr) { + if (test_bit(NEW_CUR_UUID, &mdev->flags)) { + drbd_uuid_new_current(mdev); + clear_bit(NEW_CUR_UUID, &mdev->flags); + } + } + conn_request_state(tconn, + (union drbd_state) { { .susp_fen = 1 } }, + (union drbd_state) { { .susp_fen = 0 } }, + CS_VERBOSE); + } + /* case2: The connection was established again: */ + if (ns_min.conn >= C_CONNECTED) { + idr_for_each_entry(&tconn->volumes, mdev, vnr) + clear_bit(NEW_CUR_UUID, &mdev->flags); + spin_lock_irq(&tconn->req_lock); + _tl_restart(tconn, RESEND); + _conn_request_state(tconn, + (union drbd_state) { { .susp_fen = 1 } }, + (union drbd_state) { { .susp_fen = 0 } }, + CS_VERBOSE); + spin_unlock_irq(&tconn->req_lock); + } + } + + //conn_err(tconn, STATE_FMT, STATE_ARGS("nms", nms)); after_all_state_ch(tconn); From 706cb24c239bee5442c0b046abc7bf12f1aa0579 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 29 Mar 2011 15:20:27 +0200 Subject: [PATCH 296/609] drbd: Improved logging of state changes Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 44 ++++++++++++++++++++++----------- drivers/block/drbd/drbd_state.h | 3 ++- 2 files changed, 32 insertions(+), 15 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 06bbfc772fe..c479577923c 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -384,6 +384,7 @@ static long print_state_change(char *pb, union drbd_state os, union drbd_state n char *pbp; pbp = pb; *pbp = 0; + if (ns.role != os.role && flags & CS_DC_ROLE) pbp += sprintf(pbp, "role( %s -> %s ) ", drbd_role_str(os.role), @@ -404,10 +405,18 @@ static long print_state_change(char *pb, union drbd_state os, union drbd_state n pbp += sprintf(pbp, "pdsk( %s -> %s ) ", drbd_disk_str(os.pdsk), drbd_disk_str(ns.pdsk)); - if (is_susp(ns) != is_susp(os)) - pbp += sprintf(pbp, "susp( %d -> %d ) ", - is_susp(os), - is_susp(ns)); + + return pbp - pb; +} + +static void drbd_pr_state_change(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, + enum chg_state_flags flags) +{ + char pb[300]; + char *pbp = pb; + + pbp += print_state_change(pbp, os, ns, flags ^ CS_DC_MASK); + if (ns.aftr_isp != os.aftr_isp) pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ", os.aftr_isp, @@ -421,15 +430,7 @@ static long print_state_change(char *pb, union drbd_state os, union drbd_state n os.user_isp, ns.user_isp); - return pbp - pb; -} - -static void drbd_pr_state_change(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, - enum chg_state_flags flags) -{ - char pb[300]; - - if (print_state_change(pb, os, ns, flags ^ CS_DC_MASK)) + if (pbp != pb) dev_info(DEV, "%s\n", pb); } @@ -437,8 +438,16 @@ static void conn_pr_state_change(struct drbd_tconn *tconn, union drbd_state os, enum chg_state_flags flags) { char pb[300]; + char *pbp = pb; - if (print_state_change(pb, os, ns, flags)) + pbp += print_state_change(pbp, os, ns, flags); + + if (is_susp(ns) != is_susp(os) && flags & CS_DC_SUSP) + pbp += sprintf(pbp, "susp( %d -> %d ) ", + is_susp(os), + is_susp(ns)); + + if (pbp != pb) conn_info(tconn, "%s\n", pb); } @@ -876,6 +885,12 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, drbd_pr_state_change(mdev, os, ns, flags); + /* Display changes to the susp* flags that where caused by the call to + sanitize_state(). Only display it here if we where not called from + _conn_request_state() */ + if (!(flags & CS_DC_SUSP)) + conn_pr_state_change(mdev->tconn, os, ns, (flags & ~CS_DC_MASK) | CS_DC_SUSP); + /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference * on the ldev here, to be sure the transition -> D_DISKLESS resp. * drbd_ldev_destroy() won't happen before our corresponding @@ -1628,6 +1643,7 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ } conn_old_common_state(tconn, &os, &flags); + flags |= CS_DC_SUSP; conn_set_state(tconn, mask, val, &ns_min, &ns_max, flags); conn_pr_state_change(tconn, os, ns_max, flags); diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index 757f9d93643..a3c361bbc4b 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -68,8 +68,9 @@ enum chg_state_flags { CS_DC_CONN = 1 << 7, CS_DC_DISK = 1 << 8, CS_DC_PDSK = 1 << 9, + CS_DC_SUSP = 1 << 10, CS_DC_MASK = CS_DC_ROLE + CS_DC_PEER + CS_DC_CONN + CS_DC_DISK + CS_DC_PDSK, - CS_IGN_OUTD_FAIL = 1 << 10, + CS_IGN_OUTD_FAIL = 1 << 11, }; /* drbd_dev_state and drbd_state are different types. This is to stress the From 8b924f1d63ca969581afcbb198237643a38ecd0f Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 1 Mar 2011 11:08:28 +0100 Subject: [PATCH 297/609] drbd: Use tconn in request_timer_fn() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 1 + drivers/block/drbd/drbd_req.c | 15 ++++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7bc7b098787..9ad8dd3a848 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -794,6 +794,7 @@ int drbd_connected(int vnr, void *p, void *data) err = drbd_send_state(mdev); clear_bit(USE_DEGR_WFC_T, &mdev->flags); clear_bit(RESIZE_PENDING, &mdev->flags); + mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */ return err; } diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index e296d7ed6b8..fd0b4529a4b 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1047,21 +1047,22 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct void request_timer_fn(unsigned long data) { struct drbd_conf *mdev = (struct drbd_conf *) data; + struct drbd_tconn *tconn = mdev->tconn; struct drbd_request *req; /* oldest request */ struct list_head *le; unsigned long et = 0; /* effective timeout = ko_count * timeout */ - if (get_net_conf(mdev->tconn)) { - et = mdev->tconn->net_conf->timeout*HZ/10 * mdev->tconn->net_conf->ko_count; - put_net_conf(mdev->tconn); + if (get_net_conf(tconn)) { + et = tconn->net_conf->timeout*HZ/10 * tconn->net_conf->ko_count; + put_net_conf(tconn); } if (!et || mdev->state.conn < C_WF_REPORT_PARAMS) return; /* Recurring timer stopped */ - spin_lock_irq(&mdev->tconn->req_lock); - le = &mdev->tconn->oldest_tle->requests; + spin_lock_irq(&tconn->req_lock); + le = &tconn->oldest_tle->requests; if (list_empty(le)) { - spin_unlock_irq(&mdev->tconn->req_lock); + spin_unlock_irq(&tconn->req_lock); mod_timer(&mdev->request_timer, jiffies + et); return; } @@ -1080,5 +1081,5 @@ void request_timer_fn(unsigned long data) mod_timer(&mdev->request_timer, req->start_time + et); } - spin_unlock_irq(&mdev->tconn->req_lock); + spin_unlock_irq(&tconn->req_lock); } From a17647aae4bcb272ad98ebe9de694a1b87b84989 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 1 Apr 2011 12:49:42 +0200 Subject: [PATCH 298/609] drbd: drbd_send_ping(), drbd_send_ping(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 4 ++-- drivers/block/drbd/drbd_receiver.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 064680c7564..7e766ff94e1 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -768,13 +768,13 @@ int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data, int drbd_send_ping(struct drbd_tconn *tconn) { struct p_header h; - return !conn_send_cmd(tconn, 0, &tconn->meta, P_PING, &h, sizeof(h)); + return conn_send_cmd(tconn, 0, &tconn->meta, P_PING, &h, sizeof(h)); } int drbd_send_ping_ack(struct drbd_tconn *tconn) { struct p_header h; - return !conn_send_cmd(tconn, 0, &tconn->meta, P_PING_ACK, &h, sizeof(h)); + return conn_send_cmd(tconn, 0, &tconn->meta, P_PING_ACK, &h, sizeof(h)); } int drbd_send_sync_param(struct drbd_conf *mdev) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 9ad8dd3a848..61104dbb017 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4498,7 +4498,7 @@ static int got_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi) static int got_Ping(struct drbd_tconn *tconn, struct packet_info *pi) { - return drbd_send_ping_ack(tconn); + return !drbd_send_ping_ack(tconn); } @@ -4851,7 +4851,7 @@ int drbd_asender(struct drbd_thread *thi) while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); if (test_and_clear_bit(SEND_PING, &tconn->flags)) { - if (!drbd_send_ping(tconn)) { + if (drbd_send_ping(tconn)) { conn_err(tconn, "drbd_send_ping has failed\n"); goto reconnect; } From dba58587506818211b042c7b6a37f73e0feb8862 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 29 Mar 2011 16:55:40 +0200 Subject: [PATCH 299/609] drbd: Introduce new primitives for sending commands Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 9 +++++ drivers/block/drbd/drbd_main.c | 64 ++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index aa42ccb5f54..632ca9aecef 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1903,6 +1903,15 @@ static inline int drbd_send_short_cmd(struct drbd_conf *mdev, return drbd_send_cmd(mdev, &mdev->tconn->data, cmd, &h, sizeof(h)); } +extern void *conn_prepare_command(struct drbd_tconn *, struct drbd_socket *); +extern void *drbd_prepare_command(struct drbd_conf *, struct drbd_socket *); +extern int conn_send_command(struct drbd_tconn *, struct drbd_socket *, + enum drbd_packet, unsigned int, void *, + unsigned int); +extern int drbd_send_command(struct drbd_conf *, struct drbd_socket *, + enum drbd_packet, unsigned int, void *, + unsigned int); + extern int drbd_send_ping(struct drbd_tconn *tconn); extern int drbd_send_ping_ack(struct drbd_tconn *tconn); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 7e766ff94e1..699ab11256b 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -765,6 +765,70 @@ int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data, return err; } +void *conn_prepare_command(struct drbd_tconn *tconn, struct drbd_socket *sock) +{ + mutex_lock(&sock->mutex); + if (!sock->socket) { + mutex_unlock(&sock->mutex); + return NULL; + } + return sock->sbuf; +} + +void *drbd_prepare_command(struct drbd_conf *mdev, struct drbd_socket *sock) +{ + return conn_prepare_command(mdev->tconn, sock); +} + +static int __send_command(struct drbd_tconn *tconn, int vnr, + struct drbd_socket *sock, enum drbd_packet cmd, + unsigned int header_size, void *data, + unsigned int size) +{ + int msg_flags; + int err; + + /* + * Called with @data == NULL and the size of the data blocks in @size + * for commands that send data blocks. For those commands, omit the + * MSG_MORE flag: this will increase the likelihood that data blocks + * which are page aligned on the sender will end up page aligned on the + * receiver. + */ + msg_flags = data ? MSG_MORE : 0; + + _prepare_header(tconn, vnr, sock->sbuf, cmd, + header_size - sizeof(struct p_header) + size); + err = drbd_send_all(tconn, sock->socket, sock->sbuf, header_size, + msg_flags); + if (data && !err) + err = drbd_send_all(tconn, sock->socket, data, size, 0); + return err; +} + +int conn_send_command(struct drbd_tconn *tconn, struct drbd_socket *sock, + enum drbd_packet cmd, unsigned int header_size, + void *data, unsigned int size) +{ + int err; + + err = __send_command(tconn, 0, sock, cmd, header_size, data, size); + mutex_unlock(&sock->mutex); + return err; +} + +int drbd_send_command(struct drbd_conf *mdev, struct drbd_socket *sock, + enum drbd_packet cmd, unsigned int header_size, + void *data, unsigned int size) +{ + int err; + + err = __send_command(mdev->tconn, mdev->vnr, sock, cmd, header_size, + data, size); + mutex_unlock(&sock->mutex); + return err; +} + int drbd_send_ping(struct drbd_tconn *tconn) { struct p_header h; From 52b061a44021ca11ee2fd238040e91341ff8066d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 30 Mar 2011 11:38:49 +0200 Subject: [PATCH 300/609] drbd: Introduce drbd_header_size() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 ++ drivers/block/drbd/drbd_main.c | 14 ++++++++++++++ drivers/block/drbd/drbd_receiver.c | 9 +++++---- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 632ca9aecef..729c96fc0c8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -315,6 +315,8 @@ struct p_header { u8 payload[0]; }; +extern unsigned int drbd_header_size(struct drbd_tconn *tconn); + /* * short commands, packets without payload, plain p_header: * P_PING diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 699ab11256b..606a9ecbe04 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -689,6 +689,20 @@ void drbd_thread_current_set_cpu(struct drbd_thread *thi) } #endif +/** + * drbd_header_size - size of a packet header + * + * The header size is a multiple of 8, so any payload following the header is + * word aligned on 64-bit architectures. (The bitmap send and receive code + * relies on this.) + */ +unsigned int drbd_header_size(struct drbd_tconn *tconn) +{ + BUILD_BUG_ON(sizeof(struct p_header80) != sizeof(struct p_header95)); + BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header80), 8)); + return sizeof(struct p_header80); +} + static void prepare_header80(struct p_header80 *h, enum drbd_packet cmd, int size) { h->magic = cpu_to_be32(DRBD_MAGIC); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 61104dbb017..e52a929d9ed 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -995,7 +995,7 @@ static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi) struct p_header *h = tconn->data.rbuf; int err; - err = drbd_recv_all_warn(tconn, h, sizeof(*h)); + err = drbd_recv_all_warn(tconn, h, drbd_header_size(tconn)); if (err) return err; @@ -4842,7 +4842,8 @@ int drbd_asender(struct drbd_thread *thi) int rv; void *buf = h; int received = 0; - int expect = sizeof(struct p_header); + unsigned int header_size = drbd_header_size(tconn); + int expect = header_size; int ping_timeout_active = 0; current->policy = SCHED_RR; /* Make this a realtime task! */ @@ -4926,7 +4927,7 @@ int drbd_asender(struct drbd_thread *thi) goto disconnect; } expect = cmd->pkt_size; - if (pi.size != expect - sizeof(struct p_header)) { + if (pi.size != expect - header_size) { conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n", pi.cmd, pi.size); goto reconnect; @@ -4950,7 +4951,7 @@ int drbd_asender(struct drbd_thread *thi) buf = h; received = 0; - expect = sizeof(struct p_header); + expect = header_size; cmd = NULL; } } From 9f5bdc339e3becd85aa8add305d794b0b1ec8996 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 28 Mar 2011 14:23:08 +0200 Subject: [PATCH 301/609] drbd: Replace and remove old primitives Centralize sock->mutex locking and unlocking in [drbd|conn]_prepare_command() and [drbd|conn]_send_comman(). Therefore all *_send_* functions are touched to use these primitives instead of drbd_get_data_sock()/drbd_put_data_sock() and former helper functions. That change makes the *_send_* functions more standardized. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 60 +--- drivers/block/drbd/drbd_main.c | 524 ++++++++++++++--------------- drivers/block/drbd/drbd_receiver.c | 90 +++-- drivers/block/drbd/drbd_worker.c | 24 +- 4 files changed, 322 insertions(+), 376 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 729c96fc0c8..663f7b61175 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1060,22 +1060,6 @@ static inline struct drbd_conf *vnr_to_mdev(struct drbd_tconn *tconn, int vnr) return (struct drbd_conf *)idr_find(&tconn->volumes, vnr); } -static inline int drbd_get_data_sock(struct drbd_tconn *tconn) -{ - mutex_lock(&tconn->data.mutex); - if (!tconn->data.socket) { - /* Disconnected. */ - mutex_unlock(&tconn->data.mutex); - return -EIO; - } - return 0; -} - -static inline void drbd_put_data_sock(struct drbd_tconn *tconn) -{ - mutex_unlock(&tconn->data.mutex); -} - /* * function declarations *************************/ @@ -1118,13 +1102,6 @@ extern int _conn_send_state_req(struct drbd_tconn *, int vnr, enum drbd_packet c union drbd_state, union drbd_state); extern int _drbd_send_state(struct drbd_conf *mdev); extern int drbd_send_state(struct drbd_conf *mdev); -extern int _conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct drbd_socket *sock, - enum drbd_packet cmd, struct p_header *h, size_t size, - unsigned msg_flags); -extern int conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct drbd_socket *sock, - enum drbd_packet cmd, struct p_header *h, size_t size); -extern int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, - char *data, size_t size); extern int drbd_send_sync_param(struct drbd_conf *mdev); extern void drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size); @@ -1149,7 +1126,7 @@ extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size) extern int drbd_send_bitmap(struct drbd_conf *mdev); extern void drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode); -extern int conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode); +extern void conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode); extern void drbd_free_bc(struct drbd_backing_dev *ldev); extern void drbd_mdev_cleanup(struct drbd_conf *mdev); void drbd_print_uuids(struct drbd_conf *mdev, const char *text); @@ -1885,26 +1862,6 @@ static inline void request_ping(struct drbd_tconn *tconn) wake_asender(tconn); } -static inline int _drbd_send_cmd(struct drbd_conf *mdev, struct drbd_socket *sock, - enum drbd_packet cmd, struct p_header *h, size_t size, - unsigned msg_flags) -{ - return _conn_send_cmd(mdev->tconn, mdev->vnr, sock, cmd, h, size, msg_flags); -} - -static inline int drbd_send_cmd(struct drbd_conf *mdev, struct drbd_socket *sock, - enum drbd_packet cmd, struct p_header *h, size_t size) -{ - return conn_send_cmd(mdev->tconn, mdev->vnr, sock, cmd, h, size); -} - -static inline int drbd_send_short_cmd(struct drbd_conf *mdev, - enum drbd_packet cmd) -{ - struct p_header h; - return drbd_send_cmd(mdev, &mdev->tconn->data, cmd, &h, sizeof(h)); -} - extern void *conn_prepare_command(struct drbd_tconn *, struct drbd_socket *); extern void *drbd_prepare_command(struct drbd_conf *, struct drbd_socket *); extern int conn_send_command(struct drbd_tconn *, struct drbd_socket *, @@ -1916,19 +1873,8 @@ extern int drbd_send_command(struct drbd_conf *, struct drbd_socket *, extern int drbd_send_ping(struct drbd_tconn *tconn); extern int drbd_send_ping_ack(struct drbd_tconn *tconn); - -static inline int drbd_send_state_req(struct drbd_conf *mdev, - union drbd_state mask, union drbd_state val) -{ - return _conn_send_state_req(mdev->tconn, mdev->vnr, P_STATE_CHG_REQ, mask, val); -} - -static inline int conn_send_state_req(struct drbd_tconn *tconn, - union drbd_state mask, union drbd_state val) -{ - enum drbd_packet cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REQ : P_CONN_ST_CHG_REQ; - return _conn_send_state_req(tconn, 0, cmd, mask, val); -} +extern int drbd_send_state_req(struct drbd_conf *, union drbd_state, union drbd_state); +extern int conn_send_state_req(struct drbd_tconn *, union drbd_state, union drbd_state); static inline void drbd_thread_stop(struct drbd_thread *thi) { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 606a9ecbe04..230622f1aae 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -717,8 +717,8 @@ static void prepare_header95(struct p_header95 *h, enum drbd_packet cmd, int siz h->length = cpu_to_be32(size); } -static void _prepare_header(struct drbd_tconn *tconn, int vnr, struct p_header *h, - enum drbd_packet cmd, int size) +static void prepare_header(struct drbd_tconn *tconn, int vnr, struct p_header *h, + enum drbd_packet cmd, int size) { if (tconn->agreed_pro_version >= 95) prepare_header95(&h->h95, cmd, size); @@ -726,59 +726,6 @@ static void _prepare_header(struct drbd_tconn *tconn, int vnr, struct p_header * prepare_header80(&h->h80, cmd, size); } -static void prepare_header(struct drbd_conf *mdev, struct p_header *h, - enum drbd_packet cmd, int size) -{ - _prepare_header(mdev->tconn, mdev->vnr, h, cmd, size); -} - -/* the appropriate socket mutex must be held already */ -int _conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct drbd_socket *sock, - enum drbd_packet cmd, struct p_header *h, size_t size, - unsigned msg_flags) -{ - int err; - - _prepare_header(tconn, vnr, h, cmd, size - sizeof(struct p_header)); - err = drbd_send_all(tconn, sock->socket, h, size, msg_flags); - if (err && !signal_pending(current)) - conn_warn(tconn, "short send %s size=%d\n", - cmdname(cmd), (int)size); - return err; -} - -/* don't pass the socket. we may only look at it - * when we hold the appropriate socket mutex. - */ -int conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct drbd_socket *sock, - enum drbd_packet cmd, struct p_header *h, size_t size) -{ - int err = -EIO; - - mutex_lock(&sock->mutex); - if (sock->socket) - err = _conn_send_cmd(tconn, vnr, sock, cmd, h, size, 0); - mutex_unlock(&sock->mutex); - return err; -} - -int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data, - size_t size) -{ - struct p_header80 h; - int err; - - prepare_header80(&h, cmd, size); - err = drbd_get_data_sock(tconn); - if (!err) { - err = drbd_send_all(tconn, tconn->data.socket, &h, sizeof(h), 0); - if (!err) - err = drbd_send_all(tconn, tconn->data.socket, data, size, 0); - drbd_put_data_sock(tconn); - } - return err; -} - void *conn_prepare_command(struct drbd_tconn *tconn, struct drbd_socket *sock) { mutex_lock(&sock->mutex); @@ -811,8 +758,8 @@ static int __send_command(struct drbd_tconn *tconn, int vnr, */ msg_flags = data ? MSG_MORE : 0; - _prepare_header(tconn, vnr, sock->sbuf, cmd, - header_size - sizeof(struct p_header) + size); + prepare_header(tconn, vnr, sock->sbuf, cmd, + header_size - sizeof(struct p_header) + size); err = drbd_send_all(tconn, sock->socket, sock->sbuf, header_size, msg_flags); if (data && !err) @@ -845,22 +792,36 @@ int drbd_send_command(struct drbd_conf *mdev, struct drbd_socket *sock, int drbd_send_ping(struct drbd_tconn *tconn) { - struct p_header h; - return conn_send_cmd(tconn, 0, &tconn->meta, P_PING, &h, sizeof(h)); + struct drbd_socket *sock; + + sock = &tconn->meta; + if (!conn_prepare_command(tconn, sock)) + return -EIO; + return conn_send_command(tconn, sock, P_PING, sizeof(struct p_header), NULL, 0); } int drbd_send_ping_ack(struct drbd_tconn *tconn) { - struct p_header h; - return conn_send_cmd(tconn, 0, &tconn->meta, P_PING_ACK, &h, sizeof(h)); + struct drbd_socket *sock; + + sock = &tconn->meta; + if (!conn_prepare_command(tconn, sock)) + return -EIO; + return conn_send_command(tconn, sock, P_PING_ACK, sizeof(struct p_header), NULL, 0); } int drbd_send_sync_param(struct drbd_conf *mdev) { - struct p_rs_param_95 *p; struct drbd_socket *sock; - int size, err; + struct p_rs_param_95 *p; + int size; const int apv = mdev->tconn->agreed_pro_version; + enum drbd_packet cmd; + + sock = &mdev->tconn->data; + p = drbd_prepare_command(mdev, sock); + if (!p) + return -EIO; size = apv <= 87 ? sizeof(struct p_rs_param) : apv == 88 ? sizeof(struct p_rs_param) @@ -868,112 +829,98 @@ int drbd_send_sync_param(struct drbd_conf *mdev) : apv <= 94 ? sizeof(struct p_rs_param_89) : /* apv >= 95 */ sizeof(struct p_rs_param_95); - mutex_lock(&mdev->tconn->data.mutex); - sock = &mdev->tconn->data; + cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM; - if (likely(sock->socket != NULL)) { - enum drbd_packet cmd = - apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM; + /* initialize verify_alg and csums_alg */ + memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); - p = mdev->tconn->data.sbuf; + if (get_ldev(mdev)) { + p->rate = cpu_to_be32(mdev->ldev->dc.resync_rate); + p->c_plan_ahead = cpu_to_be32(mdev->ldev->dc.c_plan_ahead); + p->c_delay_target = cpu_to_be32(mdev->ldev->dc.c_delay_target); + p->c_fill_target = cpu_to_be32(mdev->ldev->dc.c_fill_target); + p->c_max_rate = cpu_to_be32(mdev->ldev->dc.c_max_rate); + put_ldev(mdev); + } else { + p->rate = cpu_to_be32(DRBD_RATE_DEF); + p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF); + p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF); + p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF); + p->c_max_rate = cpu_to_be32(DRBD_C_MAX_RATE_DEF); + } - /* initialize verify_alg and csums_alg */ - memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); + if (apv >= 88) + strcpy(p->verify_alg, mdev->tconn->net_conf->verify_alg); + if (apv >= 89) + strcpy(p->csums_alg, mdev->tconn->net_conf->csums_alg); - if (get_ldev(mdev)) { - p->rate = cpu_to_be32(mdev->ldev->dc.resync_rate); - p->c_plan_ahead = cpu_to_be32(mdev->ldev->dc.c_plan_ahead); - p->c_delay_target = cpu_to_be32(mdev->ldev->dc.c_delay_target); - p->c_fill_target = cpu_to_be32(mdev->ldev->dc.c_fill_target); - p->c_max_rate = cpu_to_be32(mdev->ldev->dc.c_max_rate); - put_ldev(mdev); - } else { - p->rate = cpu_to_be32(DRBD_RATE_DEF); - p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF); - p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF); - p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF); - p->c_max_rate = cpu_to_be32(DRBD_C_MAX_RATE_DEF); - } - - if (apv >= 88) - strcpy(p->verify_alg, mdev->tconn->net_conf->verify_alg); - if (apv >= 89) - strcpy(p->csums_alg, mdev->tconn->net_conf->csums_alg); - - err = _drbd_send_cmd(mdev, sock, cmd, &p->head, size, 0); - } else - err = -EIO; - - mutex_unlock(&mdev->tconn->data.mutex); - - return err; + return drbd_send_command(mdev, sock, cmd, size, NULL, 0); } int drbd_send_protocol(struct drbd_tconn *tconn) { + struct drbd_socket *sock; struct p_protocol *p; - int size, cf, err; + int size, cf; - size = sizeof(struct p_protocol); + if (tconn->net_conf->dry_run && tconn->agreed_pro_version < 92) { + conn_err(tconn, "--dry-run is not supported by peer"); + return -EOPNOTSUPP; + } + sock = &tconn->data; + p = conn_prepare_command(tconn, sock); + if (!p) + return -EIO; + + size = sizeof(*p); if (tconn->agreed_pro_version >= 87) size += strlen(tconn->net_conf->integrity_alg) + 1; - /* we must not recurse into our own queue, - * as that is blocked during handshake */ - p = kmalloc(size, GFP_NOIO); - if (p == NULL) - return -ENOMEM; - p->protocol = cpu_to_be32(tconn->net_conf->wire_protocol); p->after_sb_0p = cpu_to_be32(tconn->net_conf->after_sb_0p); p->after_sb_1p = cpu_to_be32(tconn->net_conf->after_sb_1p); p->after_sb_2p = cpu_to_be32(tconn->net_conf->after_sb_2p); p->two_primaries = cpu_to_be32(tconn->net_conf->two_primaries); - cf = 0; if (tconn->net_conf->want_lose) cf |= CF_WANT_LOSE; - if (tconn->net_conf->dry_run) { - if (tconn->agreed_pro_version >= 92) - cf |= CF_DRY_RUN; - else { - conn_err(tconn, "--dry-run is not supported by peer"); - kfree(p); - return -EOPNOTSUPP; - } - } + if (tconn->net_conf->dry_run) + cf |= CF_DRY_RUN; p->conn_flags = cpu_to_be32(cf); if (tconn->agreed_pro_version >= 87) strcpy(p->integrity_alg, tconn->net_conf->integrity_alg); - - err = conn_send_cmd2(tconn, P_PROTOCOL, p->head.payload, size - sizeof(struct p_header)); - kfree(p); - return err; + return conn_send_command(tconn, sock, P_PROTOCOL, size, NULL, 0); } int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) { - struct p_uuids p; + struct drbd_socket *sock; + struct p_uuids *p; int i; if (!get_ldev_if_state(mdev, D_NEGOTIATING)) return 0; + sock = &mdev->tconn->data; + p = drbd_prepare_command(mdev, sock); + if (!p) { + put_ldev(mdev); + return -EIO; + } for (i = UI_CURRENT; i < UI_SIZE; i++) - p.uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0; + p->uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0; mdev->comm_bm_set = drbd_bm_total_weight(mdev); - p.uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set); + p->uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set); uuid_flags |= mdev->tconn->net_conf->want_lose ? 1 : 0; uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0; uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0; - p.uuid[UI_FLAGS] = cpu_to_be64(uuid_flags); + p->uuid[UI_FLAGS] = cpu_to_be64(uuid_flags); put_ldev(mdev); - - return drbd_send_cmd(mdev, &mdev->tconn->data, P_UUIDS, &p.head, sizeof(p)); + return drbd_send_command(mdev, sock, P_UUIDS, sizeof(*p), NULL, 0); } int drbd_send_uuids(struct drbd_conf *mdev) @@ -1006,7 +953,8 @@ void drbd_print_uuids(struct drbd_conf *mdev, const char *text) void drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev) { - struct p_rs_uuid p; + struct drbd_socket *sock; + struct p_rs_uuid *p; u64 uuid; D_ASSERT(mdev->state.disk == D_UP_TO_DATE); @@ -1015,14 +963,19 @@ void drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev) drbd_uuid_set(mdev, UI_BITMAP, uuid); drbd_print_uuids(mdev, "updated sync UUID"); drbd_md_sync(mdev); - p.uuid = cpu_to_be64(uuid); - drbd_send_cmd(mdev, &mdev->tconn->data, P_SYNC_UUID, &p.head, sizeof(p)); + sock = &mdev->tconn->data; + p = drbd_prepare_command(mdev, sock); + if (p) { + p->uuid = cpu_to_be64(uuid); + drbd_send_command(mdev, sock, P_SYNC_UUID, sizeof(*p), NULL, 0); + } } int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags) { - struct p_sizes p; + struct drbd_socket *sock; + struct p_sizes *p; sector_t d_size, u_size; int q_order_type, max_bio_size; @@ -1041,14 +994,17 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */ } - p.d_size = cpu_to_be64(d_size); - p.u_size = cpu_to_be64(u_size); - p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); - p.max_bio_size = cpu_to_be32(max_bio_size); - p.queue_order_type = cpu_to_be16(q_order_type); - p.dds_flags = cpu_to_be16(flags); - - return drbd_send_cmd(mdev, &mdev->tconn->data, P_SIZES, &p.head, sizeof(p)); + sock = &mdev->tconn->data; + p = drbd_prepare_command(mdev, sock); + if (!p) + return -EIO; + p->d_size = cpu_to_be64(d_size); + p->u_size = cpu_to_be64(u_size); + p->c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); + p->max_bio_size = cpu_to_be32(max_bio_size); + p->queue_order_type = cpu_to_be16(q_order_type); + p->dds_flags = cpu_to_be16(flags); + return drbd_send_command(mdev, sock, P_SIZES, sizeof(*p), NULL, 0); } /** @@ -1058,50 +1014,72 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl int drbd_send_state(struct drbd_conf *mdev) { struct drbd_socket *sock; - struct p_state p; - int err = -EIO; + struct p_state *p; - mutex_lock(&mdev->tconn->data.mutex); - - p.state = cpu_to_be32(mdev->state.i); /* Within the send mutex */ sock = &mdev->tconn->data; - - if (likely(sock->socket != NULL)) - err = _drbd_send_cmd(mdev, sock, P_STATE, &p.head, sizeof(p), 0); - - mutex_unlock(&mdev->tconn->data.mutex); - - return err; + p = drbd_prepare_command(mdev, sock); + if (!p) + return -EIO; + p->state = cpu_to_be32(mdev->state.i); /* Within the send mutex */ + return drbd_send_command(mdev, sock, P_STATE, sizeof(*p), NULL, 0); } -int _conn_send_state_req(struct drbd_tconn *tconn, int vnr, enum drbd_packet cmd, - union drbd_state mask, union drbd_state val) +int drbd_send_state_req(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val) { - struct p_req_state p; + struct drbd_socket *sock; + struct p_req_state *p; - p.mask = cpu_to_be32(mask.i); - p.val = cpu_to_be32(val.i); + sock = &mdev->tconn->data; + p = drbd_prepare_command(mdev, sock); + if (!p) + return -EIO; + p->mask = cpu_to_be32(mask.i); + p->val = cpu_to_be32(val.i); + return drbd_send_command(mdev, sock, P_STATE_CHG_REQ, sizeof(*p), NULL, 0); - return conn_send_cmd(tconn, vnr, &tconn->data, cmd, &p.head, sizeof(p)); +} + +int conn_send_state_req(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val) +{ + enum drbd_packet cmd; + struct drbd_socket *sock; + struct p_req_state *p; + + cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REQ : P_CONN_ST_CHG_REQ; + sock = &tconn->data; + p = conn_prepare_command(tconn, sock); + if (!p) + return -EIO; + p->mask = cpu_to_be32(mask.i); + p->val = cpu_to_be32(val.i); + return conn_send_command(tconn, sock, cmd, sizeof(*p), NULL, 0); } void drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode) { - struct p_req_state_reply p; + struct drbd_socket *sock; + struct p_req_state_reply *p; - p.retcode = cpu_to_be32(retcode); - - drbd_send_cmd(mdev, &mdev->tconn->meta, P_STATE_CHG_REPLY, &p.head, sizeof(p)); + sock = &mdev->tconn->meta; + p = drbd_prepare_command(mdev, sock); + if (p) { + p->retcode = cpu_to_be32(retcode); + drbd_send_command(mdev, sock, P_STATE_CHG_REPLY, sizeof(*p), NULL, 0); + } } -int conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode) +void conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode) { - struct p_req_state_reply p; + struct drbd_socket *sock; + struct p_req_state_reply *p; enum drbd_packet cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REPLY : P_CONN_ST_CHG_REPLY; - p.retcode = cpu_to_be32(retcode); - - return !conn_send_cmd(tconn, 0, &tconn->meta, cmd, &p.head, sizeof(p)); + sock = &tconn->meta; + p = conn_prepare_command(tconn, sock); + if (p) { + p->retcode = cpu_to_be32(retcode); + conn_send_command(tconn, sock, cmd, sizeof(*p), NULL, 0); + } } static void dcbp_set_code(struct p_compressed_bm *p, enum drbd_bitmap_code code) @@ -1224,21 +1202,20 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, static int send_bitmap_rle_or_plain(struct drbd_conf *mdev, struct bm_xfer_ctx *c) { - struct p_compressed_bm *p = mdev->tconn->data.sbuf; + struct drbd_socket *sock = &mdev->tconn->data; + struct p_compressed_bm *p = sock->sbuf; unsigned long num_words; int len, err; len = fill_bitmap_rle_bits(mdev, p, c); - if (len < 0) return -EIO; if (len) { dcbp_set_code(p, RLE_VLI_Bits); - err = _drbd_send_cmd(mdev, &mdev->tconn->data, - P_COMPRESSED_BITMAP, &p->head, - sizeof(*p) + len, 0); - + err = __send_command(mdev->tconn, mdev->vnr, sock, + P_COMPRESSED_BITMAP, sizeof(*p) + len, + NULL, 0); c->packets[0]++; c->bytes[0] += sizeof(*p) + len; @@ -1247,14 +1224,14 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, struct bm_xfer_ctx *c) } else { /* was not compressible. * send a buffer full of plain text bits instead. */ - struct p_header *h = mdev->tconn->data.sbuf; + struct p_header *h = sock->sbuf; num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset); len = num_words * sizeof(long); if (len) drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long *)h->payload); - err = _drbd_send_cmd(mdev, &mdev->tconn->data, P_BITMAP, - h, sizeof(struct p_header80) + len, 0); + err = __send_command(mdev->tconn, mdev->vnr, sock, P_BITMAP, + sizeof(*h) + len, NULL, 0); c->word_offset += num_words; c->bit_offset = c->word_offset * BITS_PER_LONG; @@ -1314,23 +1291,31 @@ static int _drbd_send_bitmap(struct drbd_conf *mdev) int drbd_send_bitmap(struct drbd_conf *mdev) { - int err; + struct drbd_socket *sock = &mdev->tconn->data; + int err = -1; - if (drbd_get_data_sock(mdev->tconn)) - return -1; - err = !_drbd_send_bitmap(mdev); - drbd_put_data_sock(mdev->tconn); + mutex_lock(&sock->mutex); + if (sock->socket) + err = !_drbd_send_bitmap(mdev); + mutex_unlock(&sock->mutex); return err; } + void drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size) { - struct p_barrier_ack p; + struct drbd_socket *sock; + struct p_barrier_ack *p; - p.barrier = barrier_nr; - p.set_size = cpu_to_be32(set_size); + if (mdev->state.conn < C_CONNECTED) + return; - if (mdev->state.conn >= C_CONNECTED) - drbd_send_cmd(mdev, &mdev->tconn->meta, P_BARRIER_ACK, &p.head, sizeof(p)); + sock = &mdev->tconn->meta; + p = drbd_prepare_command(mdev, sock); + if (!p) + return; + p->barrier = barrier_nr; + p->set_size = cpu_to_be32(set_size); + drbd_send_command(mdev, sock, P_BARRIER_ACK, sizeof(*p), NULL, 0); } /** @@ -1344,16 +1329,21 @@ void drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size) static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, u64 sector, u32 blksize, u64 block_id) { - struct p_block_ack p; + struct drbd_socket *sock; + struct p_block_ack *p; - p.sector = sector; - p.block_id = block_id; - p.blksize = blksize; - p.seq_num = cpu_to_be32(atomic_inc_return(&mdev->packet_seq)); - - if (!mdev->tconn->meta.socket || mdev->state.conn < C_CONNECTED) + if (mdev->state.conn < C_CONNECTED) return -EIO; - return drbd_send_cmd(mdev, &mdev->tconn->meta, cmd, &p.head, sizeof(p)); + + sock = &mdev->tconn->meta; + p = drbd_prepare_command(mdev, sock); + if (!p) + return -EIO; + p->sector = sector; + p->block_id = block_id; + p->blksize = blksize; + p->seq_num = cpu_to_be32(atomic_inc_return(&mdev->packet_seq)); + return drbd_send_command(mdev, sock, cmd, sizeof(*p), NULL, 0); } /* dp->sector and dp->block_id already/still in network byte order, @@ -1403,43 +1393,51 @@ int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd, int drbd_send_drequest(struct drbd_conf *mdev, int cmd, sector_t sector, int size, u64 block_id) { - struct p_block_req p; + struct drbd_socket *sock; + struct p_block_req *p; - p.sector = cpu_to_be64(sector); - p.block_id = block_id; - p.blksize = cpu_to_be32(size); - - return drbd_send_cmd(mdev, &mdev->tconn->data, cmd, &p.head, sizeof(p)); + sock = &mdev->tconn->data; + p = drbd_prepare_command(mdev, sock); + if (!p) + return -EIO; + p->sector = cpu_to_be64(sector); + p->block_id = block_id; + p->blksize = cpu_to_be32(size); + return drbd_send_command(mdev, sock, cmd, sizeof(*p), NULL, 0); } int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector, int size, void *digest, int digest_size, enum drbd_packet cmd) { - int err; - struct p_block_req p; + struct drbd_socket *sock; + struct p_block_req *p; - prepare_header(mdev, &p.head, cmd, sizeof(p) - sizeof(struct p_header) + digest_size); - p.sector = cpu_to_be64(sector); - p.block_id = ID_SYNCER /* unused */; - p.blksize = cpu_to_be32(size); + /* FIXME: Put the digest into the preallocated socket buffer. */ - mutex_lock(&mdev->tconn->data.mutex); - err = drbd_send_all(mdev->tconn, mdev->tconn->data.socket, &p, sizeof(p), 0); - if (!err) - err = drbd_send_all(mdev->tconn, mdev->tconn->data.socket, digest, digest_size, 0); - mutex_unlock(&mdev->tconn->data.mutex); - return err; + sock = &mdev->tconn->data; + p = drbd_prepare_command(mdev, sock); + if (!p) + return -EIO; + p->sector = cpu_to_be64(sector); + p->block_id = ID_SYNCER /* unused */; + p->blksize = cpu_to_be32(size); + return drbd_send_command(mdev, sock, cmd, sizeof(*p), + digest, digest_size); } int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) { - struct p_block_req p; + struct drbd_socket *sock; + struct p_block_req *p; - p.sector = cpu_to_be64(sector); - p.block_id = ID_SYNCER /* unused */; - p.blksize = cpu_to_be32(size); - - return drbd_send_cmd(mdev, &mdev->tconn->data, P_OV_REQUEST, &p.head, sizeof(p)); + sock = &mdev->tconn->data; + p = drbd_prepare_command(mdev, sock); + if (!p) + return -EIO; + p->sector = cpu_to_be64(sector); + p->block_id = ID_SYNCER /* unused */; + p->blksize = cpu_to_be32(size); + return drbd_send_command(mdev, sock, P_OV_REQUEST, sizeof(*p), NULL, 0); } /* called on sndtimeo @@ -1632,39 +1630,30 @@ static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw) */ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) { - int err; - struct p_data p; + struct drbd_socket *sock; + struct p_data *p; unsigned int dp_flags = 0; - void *dgb; int dgs; - - err = drbd_get_data_sock(mdev->tconn); - if (err) - return err; + int err; dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ? crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0; - prepare_header(mdev, &p.head, P_DATA, sizeof(p) - sizeof(struct p_header) + dgs + req->i.size); - p.sector = cpu_to_be64(req->i.sector); - p.block_id = (unsigned long)req; - p.seq_num = cpu_to_be32(req->seq_num = atomic_inc_return(&mdev->packet_seq)); - + sock = &mdev->tconn->data; + p = drbd_prepare_command(mdev, sock); + if (!p) + return -EIO; + p->sector = cpu_to_be64(req->i.sector); + p->block_id = (unsigned long)req; + p->seq_num = cpu_to_be32(req->seq_num = atomic_inc_return(&mdev->packet_seq)); dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw); - if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn <= C_PAUSED_SYNC_T) dp_flags |= DP_MAY_SET_IN_SYNC; - - p.dp_flags = cpu_to_be32(dp_flags); - set_bit(UNPLUG_REMOTE, &mdev->flags); - err = drbd_send_all(mdev->tconn, mdev->tconn->data.socket, &p, - sizeof(p), dgs ? MSG_MORE : 0); - if (!err && dgs) { - dgb = mdev->tconn->int_dig_out; - drbd_csum_bio(mdev, mdev->tconn->integrity_w_tfm, req->master_bio, dgb); - err = drbd_send_all(mdev->tconn, mdev->tconn->data.socket, dgb, dgs, 0); - } + p->dp_flags = cpu_to_be32(dp_flags); + if (dgs) + drbd_csum_bio(mdev, mdev->tconn->integrity_w_tfm, req->master_bio, p + 1); + err = __send_command(mdev->tconn, mdev->vnr, sock, P_DATA, sizeof(*p) + dgs, NULL, req->i.size); if (!err) { /* For protocol A, we have to memcpy the payload into * socket buffers, as we may complete right away @@ -1688,7 +1677,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) * currently supported in kernel crypto. */ unsigned char digest[64]; drbd_csum_bio(mdev, mdev->tconn->integrity_w_tfm, req->master_bio, digest); - if (memcmp(mdev->tconn->int_dig_out, digest, dgs)) { + if (memcmp(p + 1, digest, dgs)) { dev_warn(DEV, "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n", (unsigned long long)req->i.sector, req->i.size); @@ -1697,8 +1686,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) ... Be noisy about digest too large ... } */ } - - drbd_put_data_sock(mdev->tconn); + mutex_unlock(&sock->mutex); /* locked by drbd_prepare_command() */ return err; } @@ -1710,51 +1698,43 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, struct drbd_peer_request *peer_req) { + struct drbd_socket *sock; + struct p_data *p; int err; - struct p_data p; - void *dgb; int dgs; dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ? crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0; - prepare_header(mdev, &p.head, cmd, sizeof(p) - - sizeof(struct p_header80) + - dgs + peer_req->i.size); - p.sector = cpu_to_be64(peer_req->i.sector); - p.block_id = peer_req->block_id; - p.seq_num = 0; /* unused */ - - /* Only called by our kernel thread. - * This one may be interrupted by DRBD_SIG and/or DRBD_SIGKILL - * in response to admin command or module unload. - */ - err = drbd_get_data_sock(mdev->tconn); - if (err) - return err; - err = drbd_send_all(mdev->tconn, mdev->tconn->data.socket, &p, - sizeof(p), dgs ? MSG_MORE : 0); - if (!err && dgs) { - dgb = mdev->tconn->int_dig_out; - drbd_csum_ee(mdev, mdev->tconn->integrity_w_tfm, peer_req, dgb); - err = drbd_send_all(mdev->tconn, mdev->tconn->data.socket, dgb, - dgs, 0); - } + sock = &mdev->tconn->data; + p = drbd_prepare_command(mdev, sock); + if (!p) + return -EIO; + p->sector = cpu_to_be64(peer_req->i.sector); + p->block_id = peer_req->block_id; + p->seq_num = 0; /* unused */ + if (dgs) + drbd_csum_ee(mdev, mdev->tconn->integrity_w_tfm, peer_req, p + 1); + err = __send_command(mdev->tconn, mdev->vnr, sock, cmd, sizeof(*p) + dgs, NULL, peer_req->i.size); if (!err) err = _drbd_send_zc_ee(mdev, peer_req); - drbd_put_data_sock(mdev->tconn); + mutex_unlock(&sock->mutex); /* locked by drbd_prepare_command() */ return err; } int drbd_send_out_of_sync(struct drbd_conf *mdev, struct drbd_request *req) { - struct p_block_desc p; + struct drbd_socket *sock; + struct p_block_desc *p; - p.sector = cpu_to_be64(req->i.sector); - p.blksize = cpu_to_be32(req->i.size); - - return drbd_send_cmd(mdev, &mdev->tconn->data, P_OUT_OF_SYNC, &p.head, sizeof(p)); + sock = &mdev->tconn->data; + p = drbd_prepare_command(mdev, sock); + if (!p) + return -EIO; + p->sector = cpu_to_be64(req->i.sector); + p->blksize = cpu_to_be32(req->i.size); + return drbd_send_command(mdev, sock, P_OUT_OF_SYNC, sizeof(*p), NULL, 0); } /* diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e52a929d9ed..40fe7199d5f 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -729,24 +729,32 @@ out: return s_estab; } -static int drbd_send_fp(struct drbd_tconn *tconn, struct drbd_socket *sock, enum drbd_packet cmd) -{ - struct p_header *h = tconn->data.sbuf; +static int decode_header(struct drbd_tconn *, struct p_header *, struct packet_info *); - return !_conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0); +static int send_first_packet(struct drbd_tconn *tconn, struct drbd_socket *sock, + enum drbd_packet cmd) +{ + if (!conn_prepare_command(tconn, sock)) + return -EIO; + return conn_send_command(tconn, sock, cmd, sizeof(struct p_header), NULL, 0); } -static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock) +static int receive_first_packet(struct drbd_tconn *tconn, struct socket *sock) { - struct p_header80 h; - int rr; + unsigned int header_size = drbd_header_size(tconn); + struct packet_info pi; + int err; - rr = drbd_recv_short(sock, &h, sizeof(h), 0); - - if (rr == sizeof(h) && h.magic == cpu_to_be32(DRBD_MAGIC)) - return be16_to_cpu(h.command); - - return 0xffff; + err = drbd_recv_short(sock, tconn->data.rbuf, header_size, 0); + if (err != header_size) { + if (err >= 0) + err = -EIO; + return err; + } + err = decode_header(tconn, tconn->data.rbuf, &pi); + if (err) + return err; + return pi.cmd; } /** @@ -834,10 +842,10 @@ static int drbd_connect(struct drbd_tconn *tconn) if (s) { if (!tconn->data.socket) { tconn->data.socket = s; - drbd_send_fp(tconn, &tconn->data, P_INITIAL_DATA); + send_first_packet(tconn, &tconn->data, P_INITIAL_DATA); } else if (!tconn->meta.socket) { tconn->meta.socket = s; - drbd_send_fp(tconn, &tconn->meta, P_INITIAL_META); + send_first_packet(tconn, &tconn->meta, P_INITIAL_META); } else { conn_err(tconn, "Logic error in drbd_connect()\n"); goto out_release_sockets; @@ -855,7 +863,7 @@ static int drbd_connect(struct drbd_tconn *tconn) retry: s = drbd_wait_for_connect(tconn); if (s) { - try = drbd_recv_fp(tconn, s); + try = receive_first_packet(tconn, s); drbd_socket_okay(&tconn->data.socket); drbd_socket_okay(&tconn->meta.socket); switch (try) { @@ -1324,6 +1332,10 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0; if (dgs) { + /* + * FIXME: Receive the incoming digest into the receive buffer + * here, together with its struct p_data? + */ err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs); if (err) return NULL; @@ -4019,8 +4031,8 @@ static void drbdd(struct drbd_tconn *tconn) err = cmd->fn(tconn, &pi); if (err) { - conn_err(tconn, "error receiving %s, l: %d!\n", - cmdname(pi.cmd), pi.size); + conn_err(tconn, "error receiving %s, e: %d l: %d!\n", + cmdname(pi.cmd), err, pi.size); goto err_out; } } @@ -4179,27 +4191,17 @@ static int drbd_disconnected(int vnr, void *p, void *data) */ static int drbd_send_features(struct drbd_tconn *tconn) { - /* ASSERT current == mdev->tconn->receiver ... */ - struct p_connection_features *p = tconn->data.sbuf; - int err; + struct drbd_socket *sock; + struct p_connection_features *p; - if (mutex_lock_interruptible(&tconn->data.mutex)) { - conn_err(tconn, "interrupted during initial handshake\n"); - return -EINTR; - } - - if (tconn->data.socket == NULL) { - mutex_unlock(&tconn->data.mutex); + sock = &tconn->data; + p = conn_prepare_command(tconn, sock); + if (!p) return -EIO; - } - memset(p, 0, sizeof(*p)); p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); - err = _conn_send_cmd(tconn, 0, &tconn->data, P_CONNECTION_FEATURES, - &p->head, sizeof(*p), 0); - mutex_unlock(&tconn->data.mutex); - return err; + return conn_send_command(tconn, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0); } /* @@ -4283,6 +4285,7 @@ static int drbd_do_auth(struct drbd_tconn *tconn) static int drbd_do_auth(struct drbd_tconn *tconn) { + struct drbd_socket *sock; char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */ struct scatterlist sg; char *response = NULL; @@ -4294,6 +4297,8 @@ static int drbd_do_auth(struct drbd_tconn *tconn) struct packet_info pi; int err, rv; + /* FIXME: Put the challenge/response into the preallocated socket buffer. */ + desc.tfm = tconn->cram_hmac_tfm; desc.flags = 0; @@ -4307,7 +4312,14 @@ static int drbd_do_auth(struct drbd_tconn *tconn) get_random_bytes(my_challenge, CHALLENGE_LEN); - rv = !conn_send_cmd2(tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN); + sock = &tconn->data; + if (!conn_prepare_command(tconn, sock)) { + rv = 0; + goto fail; + } + rv = !conn_send_command(tconn, sock, P_AUTH_CHALLENGE, + sizeof(struct p_header), + my_challenge, CHALLENGE_LEN); if (!rv) goto fail; @@ -4361,7 +4373,13 @@ static int drbd_do_auth(struct drbd_tconn *tconn) goto fail; } - rv = !conn_send_cmd2(tconn, P_AUTH_RESPONSE, response, resp_size); + if (!conn_prepare_command(tconn, sock)) { + rv = 0; + goto fail; + } + rv = !conn_send_command(tconn, sock, P_AUTH_RESPONSE, + sizeof(struct p_header), + response, resp_size); if (!rv) goto fail; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 7350466ff30..78b95e902aa 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1191,10 +1191,10 @@ int w_prev_work_done(struct drbd_work *w, int cancel) int w_send_barrier(struct drbd_work *w, int cancel) { + struct drbd_socket *sock; struct drbd_tl_epoch *b = container_of(w, struct drbd_tl_epoch, w); struct drbd_conf *mdev = w->mdev; - struct p_barrier *p = mdev->tconn->data.sbuf; - int err = 0; + struct p_barrier *p; /* really avoid racing with tl_clear. w.cb may have been referenced * just before it was reassigned and re-queued, so double check that. @@ -1208,26 +1208,28 @@ int w_send_barrier(struct drbd_work *w, int cancel) if (cancel) return 0; - err = drbd_get_data_sock(mdev->tconn); - if (err) - return err; + sock = &mdev->tconn->data; + p = drbd_prepare_command(mdev, sock); + if (!p) + return -EIO; p->barrier = b->br_number; /* inc_ap_pending was done where this was queued. * dec_ap_pending will be done in got_BarrierAck * or (on connection loss) in w_clear_epoch. */ - err = _drbd_send_cmd(mdev, &mdev->tconn->data, P_BARRIER, - &p->head, sizeof(*p), 0); - drbd_put_data_sock(mdev->tconn); - - return err; + return drbd_send_command(mdev, sock, P_BARRIER, sizeof(*p), NULL, 0); } int w_send_write_hint(struct drbd_work *w, int cancel) { struct drbd_conf *mdev = w->mdev; + struct drbd_socket *sock; + if (cancel) return 0; - return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE); + sock = &mdev->tconn->data; + if (!drbd_prepare_command(mdev, sock)) + return -EIO; + return drbd_send_command(mdev, sock, P_UNPLUG_REMOTE, sizeof(struct p_header), NULL, 0); } int w_send_out_of_sync(struct drbd_work *w, int cancel) From da39fec49286d6b44bf441c9707dda2764b4498a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 31 Mar 2011 01:15:34 +0200 Subject: [PATCH 302/609] drbd: Remove now-unused int_dig_out buffer Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 - drivers/block/drbd/drbd_main.c | 1 - drivers/block/drbd/drbd_nl.c | 9 --------- 3 files changed, 11 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 663f7b61175..ccc374cc7ea 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -893,7 +893,6 @@ struct drbd_tconn { /* is a resource from the config file */ struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */ struct crypto_hash *csums_tfm; struct crypto_hash *verify_tfm; - void *int_dig_out; void *int_dig_in; void *int_dig_vv; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 230622f1aae..e3dc84dcd67 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2404,7 +2404,6 @@ void drbd_free_tconn(struct drbd_tconn *tconn) drbd_free_socket(&tconn->meta); drbd_free_socket(&tconn->data); kfree(tconn->name); - kfree(tconn->int_dig_out); kfree(tconn->int_dig_in); kfree(tconn->int_dig_vv); kfree(tconn); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 5cf116471aa..d9bb1a5c756 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1781,7 +1781,6 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) struct crypto_hash *tfm = NULL; struct crypto_hash *integrity_w_tfm = NULL; struct crypto_hash *integrity_r_tfm = NULL; - void *int_dig_out = NULL; void *int_dig_in = NULL; void *int_dig_vv = NULL; struct drbd_tconn *oconn; @@ -1955,11 +1954,6 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) /* allocation not in the IO path, cqueue thread context */ if (integrity_w_tfm) { i = crypto_hash_digestsize(integrity_w_tfm); - int_dig_out = kmalloc(i, GFP_KERNEL); - if (!int_dig_out) { - retcode = ERR_NOMEM; - goto fail; - } int_dig_in = kmalloc(i, GFP_KERNEL); if (!int_dig_in) { retcode = ERR_NOMEM; @@ -1990,10 +1984,8 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) crypto_free_hash(tconn->integrity_r_tfm); tconn->integrity_r_tfm = integrity_r_tfm; - kfree(tconn->int_dig_out); kfree(tconn->int_dig_in); kfree(tconn->int_dig_vv); - tconn->int_dig_out=int_dig_out; tconn->int_dig_in=int_dig_in; tconn->int_dig_vv=int_dig_vv; retcode = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE); @@ -2009,7 +2001,6 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) return 0; fail: - kfree(int_dig_out); kfree(int_dig_in); kfree(int_dig_vv); crypto_free_hash(tfm); From 50d0b1ad78b99aa776c3ddf9b1d45163fff435b9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 30 Mar 2011 11:53:51 +0200 Subject: [PATCH 303/609] drbd: Remove some fixed header size assumptions Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 15 +++------------ drivers/block/drbd/drbd_main.c | 25 +++++++++++++++---------- drivers/block/drbd/drbd_receiver.c | 29 +++++++++++++++++------------ 3 files changed, 35 insertions(+), 34 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index ccc374cc7ea..cb16783e78d 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -543,19 +543,10 @@ struct p_delay_probe93 { u32 offset; /* usecs the probe got sent after the reference time point */ } __packed; -/* one bitmap packet, including the p_header, - * should fit within one _architecture independend_ page. - * so we need to use the fixed size 4KiB page size - * most architectures have used for a long time. +/* + * Bitmap packets need to fit within a single page on the sender and receiver, + * so we are limited to 4 KiB (and not to PAGE_SIZE, which can be bigger). */ -#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header)) -#define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long)) -#define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm)) -#if (PAGE_SIZE < 4096) -/* drbd_send_bitmap / receive_bitmap would break horribly */ -#error "PAGE_SIZE too small" -#endif - #define DRBD_SOCKET_BUFFER_SIZE 4096 /**********************************************************************/ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index e3dc84dcd67..3ecbd4908cd 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1100,8 +1100,9 @@ static void dcbp_set_pad_bits(struct p_compressed_bm *p, int n) } int fill_bitmap_rle_bits(struct drbd_conf *mdev, - struct p_compressed_bm *p, - struct bm_xfer_ctx *c) + struct p_compressed_bm *p, + unsigned int size, + struct bm_xfer_ctx *c) { struct bitstream bs; unsigned long plain_bits; @@ -1120,8 +1121,8 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, return 0; /* nothing to do. */ /* use at most thus many bytes */ - bitstream_init(&bs, p->code, BM_PACKET_VLI_BYTES_MAX, 0); - memset(p->code, 0, BM_PACKET_VLI_BYTES_MAX); + bitstream_init(&bs, p->code, size, 0); + memset(p->code, 0, size); /* plain bits covered in this code string */ plain_bits = 0; @@ -1203,11 +1204,11 @@ static int send_bitmap_rle_or_plain(struct drbd_conf *mdev, struct bm_xfer_ctx *c) { struct drbd_socket *sock = &mdev->tconn->data; + unsigned int header_size = drbd_header_size(mdev->tconn); struct p_compressed_bm *p = sock->sbuf; - unsigned long num_words; int len, err; - len = fill_bitmap_rle_bits(mdev, p, c); + len = fill_bitmap_rle_bits(mdev, p, DRBD_SOCKET_BUFFER_SIZE - sizeof(*p) /* FIXME */, c); if (len < 0) return -EIO; @@ -1224,9 +1225,14 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, struct bm_xfer_ctx *c) } else { /* was not compressible. * send a buffer full of plain text bits instead. */ + unsigned int data_size; + unsigned long num_words; struct p_header *h = sock->sbuf; - num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset); - len = num_words * sizeof(long); + + data_size = DRBD_SOCKET_BUFFER_SIZE - header_size; + num_words = min_t(size_t, data_size / sizeof(unsigned long), + c->bm_words - c->word_offset); + len = num_words * sizeof(unsigned long); if (len) drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long *)h->payload); @@ -1236,7 +1242,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, struct bm_xfer_ctx *c) c->bit_offset = c->word_offset * BITS_PER_LONG; c->packets[1]++; - c->bytes[1] += sizeof(struct p_header80) + len; + c->bytes[1] += header_size + len; if (c->bit_offset > c->bm_bits) c->bit_offset = c->bm_bits; @@ -2550,7 +2556,6 @@ int __init drbd_init(void) { int err; - BUILD_BUG_ON(sizeof(struct p_header80) != sizeof(struct p_header95)); BUILD_BUG_ON(sizeof(struct p_connection_features) != 80); if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) { diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 40fe7199d5f..74ed3ac263f 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3660,16 +3660,19 @@ static int receive_sync_uuid(struct drbd_tconn *tconn, struct packet_info *pi) * code upon failure. */ static int -receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size, +receive_bitmap_plain(struct drbd_conf *mdev, unsigned int size, struct p_header *h, struct bm_xfer_ctx *c) { unsigned long *buffer = (unsigned long *)h->payload; - unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset); - unsigned want = num_words * sizeof(long); + unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - + drbd_header_size(mdev->tconn); + unsigned int num_words = min_t(size_t, data_size / sizeof(unsigned long), + c->bm_words - c->word_offset); + unsigned int want = num_words * sizeof(unsigned long); int err; - if (want != data_size) { - dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size); + if (want != size) { + dev_err(DEV, "%s:want (%u) != size (%u)\n", __func__, want, size); return -EIO; } if (want == 0) @@ -3796,11 +3799,13 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev, const char *direction, struct bm_xfer_ctx *c) { /* what would it take to transfer it "plaintext" */ - unsigned plain = sizeof(struct p_header) * - ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1) - + c->bm_words * sizeof(long); - unsigned total = c->bytes[0] + c->bytes[1]; - unsigned r; + unsigned int header_size = drbd_header_size(mdev->tconn); + unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size; + unsigned int plain = + header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) + + c->bm_words * sizeof(unsigned long); + unsigned int total = c->bytes[0] + c->bytes[1]; + unsigned int r; /* total can not be zero. but just in case: */ if (total == 0) @@ -3862,7 +3867,7 @@ static int receive_bitmap(struct drbd_tconn *tconn, struct packet_info *pi) * and the feature is enabled! */ struct p_compressed_bm *p; - if (pi->size > BM_PACKET_PAYLOAD_BYTES) { + if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(tconn)) { dev_err(DEV, "ReportCBitmap packet too large\n"); err = -EIO; goto out; @@ -3885,7 +3890,7 @@ static int receive_bitmap(struct drbd_tconn *tconn, struct packet_info *pi) } c.packets[pi->cmd == P_BITMAP]++; - c.bytes[pi->cmd == P_BITMAP] += sizeof(struct p_header) + pi->size; + c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(tconn) + pi->size; if (err <= 0) { if (err < 0) From e658983af6e62304be785cd6b0ae756723057395 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 30 Mar 2011 12:54:42 +0200 Subject: [PATCH 304/609] drbd: Remove headers from on-the-wire data structures (struct p_*) Prepare the introduction of the protocol 100 headers. The actual protocol header is removed for the packet declarations. I.e. allow us to use the packets with different headers. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 45 -------- drivers/block/drbd/drbd_main.c | 45 ++++---- drivers/block/drbd/drbd_receiver.c | 172 +++++++++++++++-------------- drivers/block/drbd/drbd_worker.c | 2 +- 4 files changed, 110 insertions(+), 154 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index cb16783e78d..6d55bb75a08 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -307,32 +307,8 @@ struct p_header95 { u32 length; /* Use only 24 bits of that. Ignore the highest 8 bit. */ } __packed; -struct p_header { - union { - struct p_header80 h80; - struct p_header95 h95; - }; - u8 payload[0]; -}; - extern unsigned int drbd_header_size(struct drbd_tconn *tconn); -/* - * short commands, packets without payload, plain p_header: - * P_PING - * P_PING_ACK - * P_BECOME_SYNC_TARGET - * P_BECOME_SYNC_SOURCE - * P_UNPLUG_REMOTE - */ - -/* - * commands with out-of-struct payload: - * P_BITMAP (no additional fields) - * P_DATA, P_DATA_REPLY (see p_data) - * P_COMPRESSED_BITMAP (see receive_compressed_bitmap) - */ - /* these defines must not be changed without changing the protocol version */ #define DP_HARDBARRIER 1 /* depricated */ #define DP_RW_SYNC 2 /* equals REQ_SYNC */ @@ -343,7 +319,6 @@ extern unsigned int drbd_header_size(struct drbd_tconn *tconn); #define DP_DISCARD 64 /* equals REQ_DISCARD */ struct p_data { - struct p_header head; u64 sector; /* 64 bits sector number */ u64 block_id; /* to identify the request in protocol B&C */ u32 seq_num; @@ -359,7 +334,6 @@ struct p_data { * P_DATA_REQUEST, P_RS_DATA_REQUEST */ struct p_block_ack { - struct p_header head; u64 sector; u64 block_id; u32 blksize; @@ -367,7 +341,6 @@ struct p_block_ack { } __packed; struct p_block_req { - struct p_header head; u64 sector; u64 block_id; u32 blksize; @@ -384,7 +357,6 @@ struct p_block_req { */ struct p_connection_features { - struct p_header head; /* Note: vnr will be ignored */ u32 protocol_min; u32 feature_flags; u32 protocol_max; @@ -396,22 +368,18 @@ struct p_connection_features { u32 _pad; u64 reserverd[7]; } __packed; -/* 80 bytes, FIXED for the next century */ struct p_barrier { - struct p_header head; u32 barrier; /* barrier number _handle_ only */ u32 pad; /* to multiple of 8 Byte */ } __packed; struct p_barrier_ack { - struct p_header head; u32 barrier; u32 set_size; } __packed; struct p_rs_param { - struct p_header head; u32 rate; /* Since protocol version 88 and higher. */ @@ -419,7 +387,6 @@ struct p_rs_param { } __packed; struct p_rs_param_89 { - struct p_header head; u32 rate; /* protocol version 89: */ char verify_alg[SHARED_SECRET_MAX]; @@ -427,7 +394,6 @@ struct p_rs_param_89 { } __packed; struct p_rs_param_95 { - struct p_header head; u32 rate; char verify_alg[SHARED_SECRET_MAX]; char csums_alg[SHARED_SECRET_MAX]; @@ -443,7 +409,6 @@ enum drbd_conn_flags { }; struct p_protocol { - struct p_header head; u32 protocol; u32 after_sb_0p; u32 after_sb_1p; @@ -457,17 +422,14 @@ struct p_protocol { } __packed; struct p_uuids { - struct p_header head; u64 uuid[UI_EXTENDED_SIZE]; } __packed; struct p_rs_uuid { - struct p_header head; u64 uuid; } __packed; struct p_sizes { - struct p_header head; u64 d_size; /* size of disk */ u64 u_size; /* user requested size */ u64 c_size; /* current exported size */ @@ -477,18 +439,15 @@ struct p_sizes { } __packed; struct p_state { - struct p_header head; u32 state; } __packed; struct p_req_state { - struct p_header head; u32 mask; u32 val; } __packed; struct p_req_state_reply { - struct p_header head; u32 retcode; } __packed; @@ -503,14 +462,12 @@ struct p_drbd06_param { } __packed; struct p_discard { - struct p_header head; u64 block_id; u32 seq_num; u32 pad; } __packed; struct p_block_desc { - struct p_header head; u64 sector; u32 blksize; u32 pad; /* to multiple of 8 Byte */ @@ -526,7 +483,6 @@ enum drbd_bitmap_code { }; struct p_compressed_bm { - struct p_header head; /* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code * (encoding & 0x80): polarity (set/unset) of first runlength * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits @@ -538,7 +494,6 @@ struct p_compressed_bm { } __packed; struct p_delay_probe93 { - struct p_header head; u32 seq_num; /* sequence number to match the two probe packets */ u32 offset; /* usecs the probe got sent after the reference time point */ } __packed; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 3ecbd4908cd..b9dcc50135c 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -703,27 +703,29 @@ unsigned int drbd_header_size(struct drbd_tconn *tconn) return sizeof(struct p_header80); } -static void prepare_header80(struct p_header80 *h, enum drbd_packet cmd, int size) +static unsigned int prepare_header80(struct p_header80 *h, enum drbd_packet cmd, int size) { h->magic = cpu_to_be32(DRBD_MAGIC); h->command = cpu_to_be16(cmd); h->length = cpu_to_be16(size); + return sizeof(struct p_header80); } -static void prepare_header95(struct p_header95 *h, enum drbd_packet cmd, int size) +static unsigned int prepare_header95(struct p_header95 *h, enum drbd_packet cmd, int size) { h->magic = cpu_to_be16(DRBD_MAGIC_BIG); h->command = cpu_to_be16(cmd); h->length = cpu_to_be32(size); + return sizeof(struct p_header95); } -static void prepare_header(struct drbd_tconn *tconn, int vnr, struct p_header *h, - enum drbd_packet cmd, int size) +static unsigned int prepare_header(struct drbd_tconn *tconn, int vnr, void *buffer, + enum drbd_packet cmd, int size) { if (tconn->agreed_pro_version >= 95) - prepare_header95(&h->h95, cmd, size); + return prepare_header95(buffer, cmd, size); else - prepare_header80(&h->h80, cmd, size); + return prepare_header80(buffer, cmd, size); } void *conn_prepare_command(struct drbd_tconn *tconn, struct drbd_socket *sock) @@ -733,7 +735,7 @@ void *conn_prepare_command(struct drbd_tconn *tconn, struct drbd_socket *sock) mutex_unlock(&sock->mutex); return NULL; } - return sock->sbuf; + return sock->sbuf + drbd_header_size(tconn); } void *drbd_prepare_command(struct drbd_conf *mdev, struct drbd_socket *sock) @@ -758,8 +760,8 @@ static int __send_command(struct drbd_tconn *tconn, int vnr, */ msg_flags = data ? MSG_MORE : 0; - prepare_header(tconn, vnr, sock->sbuf, cmd, - header_size - sizeof(struct p_header) + size); + header_size += prepare_header(tconn, vnr, sock->sbuf, cmd, + header_size + size); err = drbd_send_all(tconn, sock->socket, sock->sbuf, header_size, msg_flags); if (data && !err) @@ -797,7 +799,7 @@ int drbd_send_ping(struct drbd_tconn *tconn) sock = &tconn->meta; if (!conn_prepare_command(tconn, sock)) return -EIO; - return conn_send_command(tconn, sock, P_PING, sizeof(struct p_header), NULL, 0); + return conn_send_command(tconn, sock, P_PING, 0, NULL, 0); } int drbd_send_ping_ack(struct drbd_tconn *tconn) @@ -807,7 +809,7 @@ int drbd_send_ping_ack(struct drbd_tconn *tconn) sock = &tconn->meta; if (!conn_prepare_command(tconn, sock)) return -EIO; - return conn_send_command(tconn, sock, P_PING_ACK, sizeof(struct p_header), NULL, 0); + return conn_send_command(tconn, sock, P_PING_ACK, 0, NULL, 0); } int drbd_send_sync_param(struct drbd_conf *mdev) @@ -1205,10 +1207,11 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, struct bm_xfer_ctx *c) { struct drbd_socket *sock = &mdev->tconn->data; unsigned int header_size = drbd_header_size(mdev->tconn); - struct p_compressed_bm *p = sock->sbuf; + struct p_compressed_bm *p = sock->sbuf + header_size; int len, err; - len = fill_bitmap_rle_bits(mdev, p, DRBD_SOCKET_BUFFER_SIZE - sizeof(*p) /* FIXME */, c); + len = fill_bitmap_rle_bits(mdev, p, + DRBD_SOCKET_BUFFER_SIZE - header_size - sizeof(*p), c); if (len < 0) return -EIO; @@ -1218,7 +1221,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, struct bm_xfer_ctx *c) P_COMPRESSED_BITMAP, sizeof(*p) + len, NULL, 0); c->packets[0]++; - c->bytes[0] += sizeof(*p) + len; + c->bytes[0] += header_size + sizeof(*p) + len; if (c->bit_offset >= c->bm_bits) len = 0; /* DONE */ @@ -1227,17 +1230,15 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, struct bm_xfer_ctx *c) * send a buffer full of plain text bits instead. */ unsigned int data_size; unsigned long num_words; - struct p_header *h = sock->sbuf; + unsigned long *p = sock->sbuf + header_size; data_size = DRBD_SOCKET_BUFFER_SIZE - header_size; - num_words = min_t(size_t, data_size / sizeof(unsigned long), + num_words = min_t(size_t, data_size / sizeof(*p), c->bm_words - c->word_offset); - len = num_words * sizeof(unsigned long); + len = num_words * sizeof(*p); if (len) - drbd_bm_get_lel(mdev, c->word_offset, num_words, - (unsigned long *)h->payload); - err = __send_command(mdev->tconn, mdev->vnr, sock, P_BITMAP, - sizeof(*h) + len, NULL, 0); + drbd_bm_get_lel(mdev, c->word_offset, num_words, p); + err = __send_command(mdev->tconn, mdev->vnr, sock, P_BITMAP, len, NULL, 0); c->word_offset += num_words; c->bit_offset = c->word_offset * BITS_PER_LONG; @@ -2556,8 +2557,6 @@ int __init drbd_init(void) { int err; - BUILD_BUG_ON(sizeof(struct p_connection_features) != 80); - if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) { printk(KERN_ERR "drbd: invalid minor_count (%d)\n", minor_count); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 74ed3ac263f..7e0ab2246fb 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -52,6 +52,7 @@ struct packet_info { enum drbd_packet cmd; unsigned int size; unsigned int vnr; + void *data; }; enum finish_epoch { @@ -729,14 +730,14 @@ out: return s_estab; } -static int decode_header(struct drbd_tconn *, struct p_header *, struct packet_info *); +static int decode_header(struct drbd_tconn *, void *, struct packet_info *); static int send_first_packet(struct drbd_tconn *tconn, struct drbd_socket *sock, enum drbd_packet cmd) { if (!conn_prepare_command(tconn, sock)) return -EIO; - return conn_send_command(tconn, sock, cmd, sizeof(struct p_header), NULL, 0); + return conn_send_command(tconn, sock, cmd, 0, NULL, 0); } static int receive_first_packet(struct drbd_tconn *tconn, struct socket *sock) @@ -978,36 +979,43 @@ out_release_sockets: return -1; } -static int decode_header(struct drbd_tconn *tconn, struct p_header *h, struct packet_info *pi) +static int decode_header(struct drbd_tconn *tconn, void *header, struct packet_info *pi) { - if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) { - pi->cmd = be16_to_cpu(h->h80.command); - pi->size = be16_to_cpu(h->h80.length); - pi->vnr = 0; - } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) { - pi->cmd = be16_to_cpu(h->h95.command); - pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff; + unsigned int header_size = drbd_header_size(tconn); + + if (header_size == sizeof(struct p_header95) && + *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) { + struct p_header95 *h = header; + + pi->cmd = be16_to_cpu(h->command); + pi->size = be32_to_cpu(h->length) & 0x00ffffff; + pi->vnr = 0; + } else if (header_size == sizeof(struct p_header80) && + *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) { + struct p_header80 *h = header; + pi->cmd = be16_to_cpu(h->command); + pi->size = be16_to_cpu(h->length); pi->vnr = 0; } else { - conn_err(tconn, "magic?? on data m: 0x%08x c: %d l: %d\n", - be32_to_cpu(h->h80.magic), - be16_to_cpu(h->h80.command), - be16_to_cpu(h->h80.length)); + conn_err(tconn, "Wrong magic value 0x%08x in protocol version %d\n", + be32_to_cpu(*(__be32 *)header), + tconn->agreed_pro_version); return -EINVAL; } + pi->data = header + header_size; return 0; } static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi) { - struct p_header *h = tconn->data.rbuf; + void *buffer = tconn->data.rbuf; int err; - err = drbd_recv_all_warn(tconn, h, drbd_header_size(tconn)); + err = drbd_recv_all_warn(tconn, buffer, drbd_header_size(tconn)); if (err) return err; - err = decode_header(tconn, h, pi); + err = decode_header(tconn, buffer, pi); tconn->last_received = jiffies; return err; @@ -1242,7 +1250,7 @@ static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi) { struct drbd_conf *mdev; int rv; - struct p_barrier *p = tconn->data.rbuf; + struct p_barrier *p = pi->data; struct drbd_epoch *epoch; mdev = vnr_to_mdev(tconn, pi->vnr); @@ -1560,7 +1568,7 @@ static int receive_DataReply(struct drbd_tconn *tconn, struct packet_info *pi) struct drbd_request *req; sector_t sector; int err; - struct p_data *p = tconn->data.rbuf; + struct p_data *p = pi->data; mdev = vnr_to_mdev(tconn, pi->vnr); if (!mdev) @@ -1592,7 +1600,7 @@ static int receive_RSDataReply(struct drbd_tconn *tconn, struct packet_info *pi) struct drbd_conf *mdev; sector_t sector; int err; - struct p_data *p = tconn->data.rbuf; + struct p_data *p = pi->data; mdev = vnr_to_mdev(tconn, pi->vnr); if (!mdev) @@ -1985,7 +1993,7 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi) struct drbd_conf *mdev; sector_t sector; struct drbd_peer_request *peer_req; - struct p_data *p = tconn->data.rbuf; + struct p_data *p = pi->data; u32 peer_seq = be32_to_cpu(p->seq_num); int rw = WRITE; u32 dp_flags; @@ -2173,7 +2181,7 @@ static int receive_DataRequest(struct drbd_tconn *tconn, struct packet_info *pi) struct digest_info *di = NULL; int size, verb; unsigned int fault_type; - struct p_block_req *p = tconn->data.rbuf; + struct p_block_req *p = pi->data; mdev = vnr_to_mdev(tconn, pi->vnr); if (!mdev) @@ -2893,7 +2901,7 @@ static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self) static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) { - struct p_protocol *p = tconn->data.rbuf; + struct p_protocol *p = pi->data; int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; int p_want_lose, p_two_primaries, cf; char p_integrity_alg[SHARED_SECRET_MAX] = ""; @@ -3033,7 +3041,7 @@ static int config_unknown_volume(struct drbd_tconn *tconn, struct packet_info *p static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) { struct drbd_conf *mdev; - struct p_rs_param_95 *p = tconn->data.rbuf; + struct p_rs_param_95 *p; unsigned int header_size, data_size, exp_max_sz; struct crypto_hash *verify_tfm = NULL; struct crypto_hash *csums_tfm = NULL; @@ -3059,22 +3067,23 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) } if (apv <= 88) { - header_size = sizeof(struct p_rs_param) - sizeof(struct p_header); + header_size = sizeof(struct p_rs_param); data_size = pi->size - header_size; } else if (apv <= 94) { - header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header); + header_size = sizeof(struct p_rs_param_89); data_size = pi->size - header_size; D_ASSERT(data_size == 0); } else { - header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header); + header_size = sizeof(struct p_rs_param_95); data_size = pi->size - header_size; D_ASSERT(data_size == 0); } /* initialize verify_alg and csums_alg */ + p = pi->data; memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); - err = drbd_recv_all(mdev->tconn, &p->head.payload, header_size); + err = drbd_recv_all(mdev->tconn, p, header_size); if (err) return err; @@ -3209,7 +3218,7 @@ static void warn_if_differ_considerably(struct drbd_conf *mdev, static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi) { struct drbd_conf *mdev; - struct p_sizes *p = tconn->data.rbuf; + struct p_sizes *p = pi->data; enum determine_dev_size dd = unchanged; sector_t p_size, p_usize, my_usize; int ldsc = 0; /* local disk size changed */ @@ -3311,7 +3320,7 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi) static int receive_uuids(struct drbd_tconn *tconn, struct packet_info *pi) { struct drbd_conf *mdev; - struct p_uuids *p = tconn->data.rbuf; + struct p_uuids *p = pi->data; u64 *p_uuid; int i, updated_uuids = 0; @@ -3411,7 +3420,7 @@ static union drbd_state convert_state(union drbd_state ps) static int receive_req_state(struct drbd_tconn *tconn, struct packet_info *pi) { struct drbd_conf *mdev; - struct p_req_state *p = tconn->data.rbuf; + struct p_req_state *p = pi->data; union drbd_state mask, val; enum drbd_state_rv rv; @@ -3441,7 +3450,7 @@ static int receive_req_state(struct drbd_tconn *tconn, struct packet_info *pi) static int receive_req_conn_state(struct drbd_tconn *tconn, struct packet_info *pi) { - struct p_req_state *p = tconn->data.rbuf; + struct p_req_state *p = pi->data; union drbd_state mask, val; enum drbd_state_rv rv; @@ -3466,7 +3475,7 @@ static int receive_req_conn_state(struct drbd_tconn *tconn, struct packet_info * static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) { struct drbd_conf *mdev; - struct p_state *p = tconn->data.rbuf; + struct p_state *p = pi->data; union drbd_state os, ns, peer_state; enum drbd_disk_state real_peer_disk; enum chg_state_flags cs_flags; @@ -3623,7 +3632,7 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) static int receive_sync_uuid(struct drbd_tconn *tconn, struct packet_info *pi) { struct drbd_conf *mdev; - struct p_rs_uuid *p = tconn->data.rbuf; + struct p_rs_uuid *p = pi->data; mdev = vnr_to_mdev(tconn, pi->vnr); if (!mdev) @@ -3661,14 +3670,13 @@ static int receive_sync_uuid(struct drbd_tconn *tconn, struct packet_info *pi) */ static int receive_bitmap_plain(struct drbd_conf *mdev, unsigned int size, - struct p_header *h, struct bm_xfer_ctx *c) + unsigned long *p, struct bm_xfer_ctx *c) { - unsigned long *buffer = (unsigned long *)h->payload; unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(mdev->tconn); - unsigned int num_words = min_t(size_t, data_size / sizeof(unsigned long), + unsigned int num_words = min_t(size_t, data_size / sizeof(*p), c->bm_words - c->word_offset); - unsigned int want = num_words * sizeof(unsigned long); + unsigned int want = num_words * sizeof(*p); int err; if (want != size) { @@ -3677,11 +3685,11 @@ receive_bitmap_plain(struct drbd_conf *mdev, unsigned int size, } if (want == 0) return 0; - err = drbd_recv_all(mdev->tconn, buffer, want); + err = drbd_recv_all(mdev->tconn, p, want); if (err) return err; - drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer); + drbd_bm_merge_lel(mdev, c->word_offset, num_words, p); c->word_offset += num_words; c->bit_offset = c->word_offset * BITS_PER_LONG; @@ -3784,7 +3792,7 @@ decode_bitmap_c(struct drbd_conf *mdev, unsigned int len) { if (dcbp_get_code(p) == RLE_VLI_Bits) - return recv_bm_rle_bits(mdev, p, c, len); + return recv_bm_rle_bits(mdev, p, c, len - sizeof(*p)); /* other variants had been implemented for evaluation, * but have been dropped as this one turned out to be "best" @@ -3844,7 +3852,6 @@ static int receive_bitmap(struct drbd_tconn *tconn, struct packet_info *pi) struct drbd_conf *mdev; struct bm_xfer_ctx c; int err; - struct p_header *h = tconn->data.rbuf; mdev = vnr_to_mdev(tconn, pi->vnr); if (!mdev) @@ -3860,28 +3867,26 @@ static int receive_bitmap(struct drbd_tconn *tconn, struct packet_info *pi) }; for(;;) { - if (pi->cmd == P_BITMAP) { - err = receive_bitmap_plain(mdev, pi->size, h, &c); - } else if (pi->cmd == P_COMPRESSED_BITMAP) { + if (pi->cmd == P_BITMAP) + err = receive_bitmap_plain(mdev, pi->size, pi->data, &c); + else if (pi->cmd == P_COMPRESSED_BITMAP) { /* MAYBE: sanity check that we speak proto >= 90, * and the feature is enabled! */ - struct p_compressed_bm *p; + struct p_compressed_bm *p = pi->data; if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(tconn)) { dev_err(DEV, "ReportCBitmap packet too large\n"); err = -EIO; goto out; } - - p = mdev->tconn->data.rbuf; - err = drbd_recv_all(mdev->tconn, p->head.payload, pi->size); - if (err) - goto out; - if (pi->size <= (sizeof(*p) - sizeof(p->head))) { + if (pi->size <= sizeof(*p)) { dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", pi->size); err = -EIO; goto out; } + err = drbd_recv_all(mdev->tconn, p, pi->size); + if (err) + goto out; err = decode_bitmap_c(mdev, p, &c, pi->size); } else { dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd); @@ -3948,7 +3953,7 @@ static int receive_UnplugRemote(struct drbd_tconn *tconn, struct packet_info *pi static int receive_out_of_sync(struct drbd_tconn *tconn, struct packet_info *pi) { struct drbd_conf *mdev; - struct p_block_desc *p = tconn->data.rbuf; + struct p_block_desc *p = pi->data; mdev = vnr_to_mdev(tconn, pi->vnr); if (!mdev) @@ -3980,13 +3985,13 @@ static struct data_cmd drbd_cmd_handler[] = { [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply }, [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } , [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } , - [P_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } , - [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } , - [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), receive_UnplugRemote }, + [P_BITMAP] = { 1, 0, receive_bitmap } , + [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } , + [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote }, [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, - [P_SYNC_PARAM] = { 1, sizeof(struct p_header), receive_SyncParam }, - [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), receive_SyncParam }, + [P_SYNC_PARAM] = { 1, 0, receive_SyncParam }, + [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam }, [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol }, [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids }, [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes }, @@ -4003,7 +4008,6 @@ static struct data_cmd drbd_cmd_handler[] = { static void drbdd(struct drbd_tconn *tconn) { - struct p_header *header = tconn->data.rbuf; struct packet_info pi; size_t shs; /* sub header size */ int err; @@ -4021,14 +4025,14 @@ static void drbdd(struct drbd_tconn *tconn) goto err_out; } - shs = cmd->pkt_size - sizeof(struct p_header); - if (pi.size - shs > 0 && !cmd->expect_payload) { + shs = cmd->pkt_size; + if (pi.size > shs && !cmd->expect_payload) { conn_err(tconn, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size); goto err_out; } if (shs) { - err = drbd_recv_all_warn(tconn, &header->payload, shs); + err = drbd_recv_all_warn(tconn, pi.data, shs); if (err) goto err_out; pi.size -= shs; @@ -4219,8 +4223,8 @@ static int drbd_send_features(struct drbd_tconn *tconn) static int drbd_do_features(struct drbd_tconn *tconn) { /* ASSERT current == tconn->receiver ... */ - struct p_connection_features *p = tconn->data.rbuf; - const int expect = sizeof(struct p_connection_features) - sizeof(struct p_header80); + struct p_connection_features *p; + const int expect = sizeof(struct p_connection_features); struct packet_info pi; int err; @@ -4244,7 +4248,8 @@ static int drbd_do_features(struct drbd_tconn *tconn) return -1; } - err = drbd_recv_all_warn(tconn, &p->head.payload, expect); + p = pi.data; + err = drbd_recv_all_warn(tconn, p, expect); if (err) return 0; @@ -4322,8 +4327,7 @@ static int drbd_do_auth(struct drbd_tconn *tconn) rv = 0; goto fail; } - rv = !conn_send_command(tconn, sock, P_AUTH_CHALLENGE, - sizeof(struct p_header), + rv = !conn_send_command(tconn, sock, P_AUTH_CHALLENGE, 0, my_challenge, CHALLENGE_LEN); if (!rv) goto fail; @@ -4382,8 +4386,7 @@ static int drbd_do_auth(struct drbd_tconn *tconn) rv = 0; goto fail; } - rv = !conn_send_command(tconn, sock, P_AUTH_RESPONSE, - sizeof(struct p_header), + rv = !conn_send_command(tconn, sock, P_AUTH_RESPONSE, 0, response, resp_size); if (!rv) goto fail; @@ -4482,7 +4485,7 @@ int drbdd_init(struct drbd_thread *thi) static int got_conn_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi) { - struct p_req_state_reply *p = tconn->meta.rbuf; + struct p_req_state_reply *p = pi->data; int retcode = be32_to_cpu(p->retcode); if (retcode >= SS_SUCCESS) { @@ -4500,7 +4503,7 @@ static int got_conn_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi) static int got_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi) { struct drbd_conf *mdev; - struct p_req_state_reply *p = tconn->meta.rbuf; + struct p_req_state_reply *p = pi->data; int retcode = be32_to_cpu(p->retcode); mdev = vnr_to_mdev(tconn, pi->vnr); @@ -4538,7 +4541,7 @@ static int got_PingAck(struct drbd_tconn *tconn, struct packet_info *pi) static int got_IsInSync(struct drbd_tconn *tconn, struct packet_info *pi) { struct drbd_conf *mdev; - struct p_block_ack *p = tconn->meta.rbuf; + struct p_block_ack *p = pi->data; sector_t sector = be64_to_cpu(p->sector); int blksize = be32_to_cpu(p->blksize); @@ -4588,7 +4591,7 @@ validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector, static int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi) { struct drbd_conf *mdev; - struct p_block_ack *p = tconn->meta.rbuf; + struct p_block_ack *p = pi->data; sector_t sector = be64_to_cpu(p->sector); int blksize = be32_to_cpu(p->blksize); enum drbd_req_event what; @@ -4638,7 +4641,7 @@ static int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi) static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi) { struct drbd_conf *mdev; - struct p_block_ack *p = tconn->meta.rbuf; + struct p_block_ack *p = pi->data; sector_t sector = be64_to_cpu(p->sector); int size = be32_to_cpu(p->blksize); bool missing_ok = tconn->net_conf->wire_protocol == DRBD_PROT_A || @@ -4676,7 +4679,7 @@ static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi) static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi) { struct drbd_conf *mdev; - struct p_block_ack *p = tconn->meta.rbuf; + struct p_block_ack *p = pi->data; sector_t sector = be64_to_cpu(p->sector); mdev = vnr_to_mdev(tconn, pi->vnr); @@ -4698,7 +4701,7 @@ static int got_NegRSDReply(struct drbd_tconn *tconn, struct packet_info *pi) struct drbd_conf *mdev; sector_t sector; int size; - struct p_block_ack *p = tconn->meta.rbuf; + struct p_block_ack *p = pi->data; mdev = vnr_to_mdev(tconn, pi->vnr); if (!mdev) @@ -4732,7 +4735,7 @@ static int got_NegRSDReply(struct drbd_tconn *tconn, struct packet_info *pi) static int got_BarrierAck(struct drbd_tconn *tconn, struct packet_info *pi) { struct drbd_conf *mdev; - struct p_barrier_ack *p = tconn->meta.rbuf; + struct p_barrier_ack *p = pi->data; mdev = vnr_to_mdev(tconn, pi->vnr); if (!mdev) @@ -4753,7 +4756,7 @@ static int got_BarrierAck(struct drbd_tconn *tconn, struct packet_info *pi) static int got_OVResult(struct drbd_tconn *tconn, struct packet_info *pi) { struct drbd_conf *mdev; - struct p_block_ack *p = tconn->meta.rbuf; + struct p_block_ack *p = pi->data; struct drbd_work *w; sector_t sector; int size; @@ -4837,8 +4840,8 @@ struct asender_cmd { }; static struct asender_cmd asender_tbl[] = { - [P_PING] = { sizeof(struct p_header), got_Ping }, - [P_PING_ACK] = { sizeof(struct p_header), got_PingAck }, + [P_PING] = { 0, got_Ping }, + [P_PING_ACK] = { 0, got_PingAck }, [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, @@ -4859,11 +4862,10 @@ static struct asender_cmd asender_tbl[] = { int drbd_asender(struct drbd_thread *thi) { struct drbd_tconn *tconn = thi->tconn; - struct p_header *h = tconn->meta.rbuf; struct asender_cmd *cmd = NULL; struct packet_info pi; int rv; - void *buf = h; + void *buf = tconn->meta.rbuf; int received = 0; unsigned int header_size = drbd_header_size(tconn); int expect = header_size; @@ -4941,7 +4943,7 @@ int drbd_asender(struct drbd_thread *thi) } if (received == expect && cmd == NULL) { - if (decode_header(tconn, h, &pi)) + if (decode_header(tconn, tconn->meta.rbuf, &pi)) goto reconnect; cmd = &asender_tbl[pi.cmd]; if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) { @@ -4949,7 +4951,7 @@ int drbd_asender(struct drbd_thread *thi) pi.cmd, pi.size); goto disconnect; } - expect = cmd->pkt_size; + expect = header_size + cmd->pkt_size; if (pi.size != expect - header_size) { conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n", pi.cmd, pi.size); @@ -4972,7 +4974,7 @@ int drbd_asender(struct drbd_thread *thi) if (cmd == &asender_tbl[P_PING_ACK]) ping_timeout_active = 0; - buf = h; + buf = tconn->meta.rbuf; received = 0; expect = header_size; cmd = NULL; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 78b95e902aa..086a4b6439a 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1229,7 +1229,7 @@ int w_send_write_hint(struct drbd_work *w, int cancel) sock = &mdev->tconn->data; if (!drbd_prepare_command(mdev, sock)) return -EIO; - return drbd_send_command(mdev, sock, P_UNPLUG_REMOTE, sizeof(struct p_header), NULL, 0); + return drbd_send_command(mdev, sock, P_UNPLUG_REMOTE, 0, NULL, 0); } int w_send_out_of_sync(struct drbd_work *w, int cancel) From 0c8e36d9b843be56e4e43d4ef3c3eb6a97205599 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 30 Mar 2011 16:00:17 +0200 Subject: [PATCH 305/609] drbd: Introduce protocol version 100 headers The 8 byte header finally becomes too small. With the protocol 100 header we have 16 bit for the volume number, proper 32 bit for the data length, and 32 bit for further extensions in the future. Previous versions of drbd are using version 80 headers for all packets short enough for protocol 80. They support both header versions in worker context, but only version 80 headers in asynchronous context. For backwards compatibility, continue to use version 80 headers for short packets before protocol version 100. From protocol version 100 on, use the same header version for all packets. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 8 ++++++++ drivers/block/drbd/drbd_main.c | 32 ++++++++++++++++++++++++------ drivers/block/drbd/drbd_nl.c | 3 +-- drivers/block/drbd/drbd_receiver.c | 14 +++++++++++-- include/linux/drbd.h | 1 + include/linux/drbd_genl.h | 2 -- include/linux/drbd_limits.h | 2 ++ 7 files changed, 50 insertions(+), 12 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 6d55bb75a08..bf1aad68338 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -307,6 +307,14 @@ struct p_header95 { u32 length; /* Use only 24 bits of that. Ignore the highest 8 bit. */ } __packed; +struct p_header100 { + u32 magic; + u16 volume; + u16 command; + u32 length; + u32 pad; +} __packed; + extern unsigned int drbd_header_size(struct drbd_tconn *tconn); /* these defines must not be changed without changing the protocol version */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index b9dcc50135c..5d9112cefcd 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -698,9 +698,15 @@ void drbd_thread_current_set_cpu(struct drbd_thread *thi) */ unsigned int drbd_header_size(struct drbd_tconn *tconn) { - BUILD_BUG_ON(sizeof(struct p_header80) != sizeof(struct p_header95)); - BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header80), 8)); - return sizeof(struct p_header80); + if (tconn->agreed_pro_version >= 100) { + BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header100), 8)); + return sizeof(struct p_header100); + } else { + BUILD_BUG_ON(sizeof(struct p_header80) != + sizeof(struct p_header95)); + BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header80), 8)); + return sizeof(struct p_header80); + } } static unsigned int prepare_header80(struct p_header80 *h, enum drbd_packet cmd, int size) @@ -719,10 +725,24 @@ static unsigned int prepare_header95(struct p_header95 *h, enum drbd_packet cmd, return sizeof(struct p_header95); } -static unsigned int prepare_header(struct drbd_tconn *tconn, int vnr, void *buffer, - enum drbd_packet cmd, int size) +static unsigned int prepare_header100(struct p_header100 *h, enum drbd_packet cmd, + int size, int vnr) { - if (tconn->agreed_pro_version >= 95) + h->magic = cpu_to_be32(DRBD_MAGIC_100); + h->volume = cpu_to_be16(vnr); + h->command = cpu_to_be16(cmd); + h->length = cpu_to_be32(size); + h->pad = 0; + return sizeof(struct p_header100); +} + +static unsigned int prepare_header(struct drbd_tconn *tconn, int vnr, + void *buffer, enum drbd_packet cmd, int size) +{ + if (tconn->agreed_pro_version >= 100) + return prepare_header100(buffer, cmd, size, vnr); + else if (tconn->agreed_pro_version >= 95 && + size > DRBD_MAX_SIZE_H80_PACKET) return prepare_header95(buffer, cmd, size); else return prepare_header80(buffer, cmd, size); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index d9bb1a5c756..0f52b88719c 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2833,8 +2833,7 @@ int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info) retcode = ERR_INVALID_REQUEST; goto out; } - /* FIXME we need a define here */ - if (adm_ctx.volume >= 256) { + if (adm_ctx.volume > DRBD_VOLUME_MAX) { drbd_msg_put_info("requested volume id out of range"); retcode = ERR_INVALID_REQUEST; goto out; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7e0ab2246fb..311b95453cb 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -983,8 +983,18 @@ static int decode_header(struct drbd_tconn *tconn, void *header, struct packet_i { unsigned int header_size = drbd_header_size(tconn); - if (header_size == sizeof(struct p_header95) && - *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) { + if (header_size == sizeof(struct p_header100) && + *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) { + struct p_header100 *h = header; + if (h->pad != 0) { + conn_err(tconn, "Header padding is not zero\n"); + return -EINVAL; + } + pi->vnr = be16_to_cpu(h->volume); + pi->cmd = be16_to_cpu(h->command); + pi->size = be32_to_cpu(h->length); + } else if (header_size == sizeof(struct p_header95) && + *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) { struct p_header95 *h = header; pi->cmd = be16_to_cpu(h->command); diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 60d30881909..fe8d6ba31bc 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -341,6 +341,7 @@ enum drbd_timeout_flag { #define DRBD_MAGIC 0x83740267 #define DRBD_MAGIC_BIG 0x835a +#define DRBD_MAGIC_100 0x8620ec20 /* how I came up with this magic? * base64 decode "actlog==" ;) */ diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 938e8560a83..10144d546a6 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -95,8 +95,6 @@ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, * and/or the replication group (aka resource) name, * and the volume id within the resource. */ GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, - /* currently only 256 volumes per group, - * but maybe we still change that */ __u32_field(1, GENLA_F_MANDATORY, ctx_volume) __str_field(2, GENLA_F_MANDATORY, ctx_conn_name, 128) ) diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 659a8eb3883..7f5149bef70 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -19,6 +19,8 @@ #define DRBD_MINOR_COUNT_MAX 256 #define DRBD_MINOR_COUNT_DEF 32 +#define DRBD_VOLUME_MAX 65535 + #define DRBD_DIALOG_REFRESH_MIN 0 #define DRBD_DIALOG_REFRESH_MAX 600 From b55d84ba17e90491ac2046583327d4756159efd6 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 22 Mar 2011 13:17:47 +0100 Subject: [PATCH 306/609] drbd: Removed outdated comments and code that envisioned VNRs in header 95 Since have now header 100, that has space for 16 bit volume numbers, the high byte of the length in header 95 is no longer reserved for 8 bit volume numbers. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_receiver.c | 5 ++--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index bf1aad68338..43e9490fabe 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -304,7 +304,7 @@ struct p_header80 { struct p_header95 { u16 magic; /* use DRBD_MAGIC_BIG here */ u16 command; - u32 length; /* Use only 24 bits of that. Ignore the highest 8 bit. */ + u32 length; } __packed; struct p_header100 { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 5d9112cefcd..e1aef12e509 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -721,7 +721,7 @@ static unsigned int prepare_header95(struct p_header95 *h, enum drbd_packet cmd, { h->magic = cpu_to_be16(DRBD_MAGIC_BIG); h->command = cpu_to_be16(cmd); - h->length = cpu_to_be32(size); + h->length = cpu_to_be32(size); return sizeof(struct p_header95); } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 311b95453cb..f846e55c174 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -996,10 +996,9 @@ static int decode_header(struct drbd_tconn *tconn, void *header, struct packet_i } else if (header_size == sizeof(struct p_header95) && *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) { struct p_header95 *h = header; - pi->cmd = be16_to_cpu(h->command); - pi->size = be32_to_cpu(h->length) & 0x00ffffff; - pi->vnr = 0; + pi->size = be32_to_cpu(h->length); + pi->vnr = 0; } else if (header_size == sizeof(struct p_header80) && *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) { struct p_header80 *h = header; From 859976758dc6f33ed76c21365bb49a4af9e2dd59 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 4 Apr 2011 13:09:15 +0200 Subject: [PATCH 307/609] drbd: validate_req_change_req_state(): Return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index f846e55c174..cf816b79278 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4587,14 +4587,14 @@ validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector, req = find_request(mdev, root, id, sector, missing_ok, func); if (unlikely(!req)) { spin_unlock_irq(&mdev->tconn->req_lock); - return false; + return -EIO; } __req_mod(req, what, &m); spin_unlock_irq(&mdev->tconn->req_lock); if (m.bio) complete_master_bio(mdev, &m); - return true; + return 0; } static int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi) @@ -4642,9 +4642,9 @@ static int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi) return false; } - return validate_req_change_req_state(mdev, p->block_id, sector, - &mdev->write_requests, __func__, - what, false); + return !validate_req_change_req_state(mdev, p->block_id, sector, + &mdev->write_requests, __func__, + what, false); } static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi) @@ -4655,7 +4655,7 @@ static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi) int size = be32_to_cpu(p->blksize); bool missing_ok = tconn->net_conf->wire_protocol == DRBD_PROT_A || tconn->net_conf->wire_protocol == DRBD_PROT_B; - bool found; + int err; mdev = vnr_to_mdev(tconn, pi->vnr); if (!mdev) @@ -4669,10 +4669,10 @@ static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi) return true; } - found = validate_req_change_req_state(mdev, p->block_id, sector, - &mdev->write_requests, __func__, - NEG_ACKED, missing_ok); - if (!found) { + err = validate_req_change_req_state(mdev, p->block_id, sector, + &mdev->write_requests, __func__, + NEG_ACKED, missing_ok); + if (err) { /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs. The master bio might already be completed, therefore the request is no longer in the collision hash. */ @@ -4700,9 +4700,9 @@ static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi) dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n", (unsigned long long)sector, be32_to_cpu(p->blksize)); - return validate_req_change_req_state(mdev, p->block_id, sector, - &mdev->read_requests, __func__, - NEG_ACKED, false); + return !validate_req_change_req_state(mdev, p->block_id, sector, + &mdev->read_requests, __func__, + NEG_ACKED, false); } static int got_NegRSDReply(struct drbd_tconn *tconn, struct packet_info *pi) From 2735a59467ad84b14bb43185702c0bab352126c0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 4 Apr 2011 15:30:24 +0200 Subject: [PATCH 308/609] drbd: Make all asynchronous command handlers return 0 upon success and an error code otherwise Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 69 ++++++++++++++---------------- 1 file changed, 33 insertions(+), 36 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index cf816b79278..b2782b0c7d8 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4506,7 +4506,7 @@ static int got_conn_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi) } wake_up(&tconn->ping_wait); - return true; + return 0; } static int got_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi) @@ -4517,7 +4517,7 @@ static int got_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi) mdev = vnr_to_mdev(tconn, pi->vnr); if (!mdev) - return false; + return -EIO; if (retcode >= SS_SUCCESS) { set_bit(CL_ST_CHG_SUCCESS, &mdev->flags); @@ -4528,12 +4528,12 @@ static int got_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi) } wake_up(&mdev->state_wait); - return true; + return 0; } static int got_Ping(struct drbd_tconn *tconn, struct packet_info *pi) { - return !drbd_send_ping_ack(tconn); + return drbd_send_ping_ack(tconn); } @@ -4544,7 +4544,7 @@ static int got_PingAck(struct drbd_tconn *tconn, struct packet_info *pi) if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags)) wake_up(&tconn->ping_wait); - return true; + return 0; } static int got_IsInSync(struct drbd_tconn *tconn, struct packet_info *pi) @@ -4556,7 +4556,7 @@ static int got_IsInSync(struct drbd_tconn *tconn, struct packet_info *pi) mdev = vnr_to_mdev(tconn, pi->vnr); if (!mdev) - return false; + return -EIO; D_ASSERT(mdev->tconn->agreed_pro_version >= 89); @@ -4572,7 +4572,7 @@ static int got_IsInSync(struct drbd_tconn *tconn, struct packet_info *pi) dec_rs_pending(mdev); atomic_add(blksize >> 9, &mdev->rs_sect_in); - return true; + return 0; } static int @@ -4607,14 +4607,14 @@ static int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi) mdev = vnr_to_mdev(tconn, pi->vnr); if (!mdev) - return false; + return -EIO; update_peer_seq(mdev, be32_to_cpu(p->seq_num)); if (p->block_id == ID_SYNCER) { drbd_set_in_sync(mdev, sector, blksize); dec_rs_pending(mdev); - return true; + return 0; } switch (pi->cmd) { case P_RS_WRITE_ACK: @@ -4638,13 +4638,12 @@ static int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi) what = POSTPONE_WRITE; break; default: - D_ASSERT(0); - return false; + BUG(); } - return !validate_req_change_req_state(mdev, p->block_id, sector, - &mdev->write_requests, __func__, - what, false); + return validate_req_change_req_state(mdev, p->block_id, sector, + &mdev->write_requests, __func__, + what, false); } static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi) @@ -4659,14 +4658,14 @@ static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi) mdev = vnr_to_mdev(tconn, pi->vnr); if (!mdev) - return false; + return -EIO; update_peer_seq(mdev, be32_to_cpu(p->seq_num)); if (p->block_id == ID_SYNCER) { dec_rs_pending(mdev); drbd_rs_failed_io(mdev, sector, size); - return true; + return 0; } err = validate_req_change_req_state(mdev, p->block_id, sector, @@ -4679,10 +4678,10 @@ static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi) /* In Protocol B we might already have got a P_RECV_ACK but then get a P_NEG_ACK afterwards. */ if (!missing_ok) - return false; + return err; drbd_set_out_of_sync(mdev, sector, size); } - return true; + return 0; } static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi) @@ -4693,16 +4692,16 @@ static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi) mdev = vnr_to_mdev(tconn, pi->vnr); if (!mdev) - return false; + return -EIO; update_peer_seq(mdev, be32_to_cpu(p->seq_num)); dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n", (unsigned long long)sector, be32_to_cpu(p->blksize)); - return !validate_req_change_req_state(mdev, p->block_id, sector, - &mdev->read_requests, __func__, - NEG_ACKED, false); + return validate_req_change_req_state(mdev, p->block_id, sector, + &mdev->read_requests, __func__, + NEG_ACKED, false); } static int got_NegRSDReply(struct drbd_tconn *tconn, struct packet_info *pi) @@ -4714,7 +4713,7 @@ static int got_NegRSDReply(struct drbd_tconn *tconn, struct packet_info *pi) mdev = vnr_to_mdev(tconn, pi->vnr); if (!mdev) - return false; + return -EIO; sector = be64_to_cpu(p->sector); size = be32_to_cpu(p->blksize); @@ -4731,14 +4730,12 @@ static int got_NegRSDReply(struct drbd_tconn *tconn, struct packet_info *pi) case P_RS_CANCEL: break; default: - D_ASSERT(0); - put_ldev(mdev); - return false; + BUG(); } put_ldev(mdev); } - return true; + return 0; } static int got_BarrierAck(struct drbd_tconn *tconn, struct packet_info *pi) @@ -4748,7 +4745,7 @@ static int got_BarrierAck(struct drbd_tconn *tconn, struct packet_info *pi) mdev = vnr_to_mdev(tconn, pi->vnr); if (!mdev) - return false; + return -EIO; tl_release(mdev->tconn, p->barrier, be32_to_cpu(p->set_size)); @@ -4759,7 +4756,7 @@ static int got_BarrierAck(struct drbd_tconn *tconn, struct packet_info *pi) add_timer(&mdev->start_resync_timer); } - return true; + return 0; } static int got_OVResult(struct drbd_tconn *tconn, struct packet_info *pi) @@ -4772,7 +4769,7 @@ static int got_OVResult(struct drbd_tconn *tconn, struct packet_info *pi) mdev = vnr_to_mdev(tconn, pi->vnr); if (!mdev) - return false; + return -EIO; sector = be64_to_cpu(p->sector); size = be32_to_cpu(p->blksize); @@ -4785,7 +4782,7 @@ static int got_OVResult(struct drbd_tconn *tconn, struct packet_info *pi) ov_out_of_sync_print(mdev); if (!get_ldev(mdev)) - return true; + return 0; drbd_rs_complete_io(mdev, sector); dec_rs_pending(mdev); @@ -4809,12 +4806,12 @@ static int got_OVResult(struct drbd_tconn *tconn, struct packet_info *pi) } } put_ldev(mdev); - return true; + return 0; } static int got_skip(struct drbd_tconn *tconn, struct packet_info *pi) { - return true; + return 0; } static int tconn_process_done_ee(struct drbd_tconn *tconn) @@ -4968,10 +4965,10 @@ int drbd_asender(struct drbd_thread *thi) } } if (received == expect) { - bool rv; + bool err; - rv = cmd->fn(tconn, &pi); - if (!rv) { + err = cmd->fn(tconn, &pi); + if (err) { conn_err(tconn, "%pf failed\n", cmd->fn); goto reconnect; } From e0ab6ad4bcd0cbe6c6cfe90836352c8daf078509 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 6 Apr 2011 17:18:29 +0200 Subject: [PATCH 309/609] drbd: drbd_init_ee() no longer exists Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 4 ---- drivers/block/drbd/drbd_receiver.c | 1 - 2 files changed, 5 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index e1aef12e509..4f451b978cf 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2031,10 +2031,6 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev) drbd_free_resources(mdev); clear_bit(AL_SUSPENDED, &mdev->flags); - /* - * currently we drbd_init_ee only on module load, so - * we may do drbd_release_ee only on module unload! - */ D_ASSERT(list_empty(&mdev->active_ee)); D_ASSERT(list_empty(&mdev->sync_ee)); D_ASSERT(list_empty(&mdev->done_ee)); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index b2782b0c7d8..2c157257a03 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -309,7 +309,6 @@ You need to hold the req_lock: You must not have the req_lock: drbd_free_ee() drbd_alloc_ee() - drbd_init_ee() drbd_release_ee() drbd_ee_fix_bhs() drbd_process_done_ee() From 0db55363cb1e6cfe2bedecb7e47c05f8992c612e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 6 Apr 2011 16:09:15 +0200 Subject: [PATCH 310/609] drbd: Rename drbd_alloc_ee() to drbd_alloc_peer_req() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 6 +++--- drivers/block/drbd/drbd_receiver.c | 12 ++++++------ drivers/block/drbd/drbd_worker.c | 3 ++- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 43e9490fabe..ff7182dde0d 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1456,9 +1456,9 @@ extern int drbd_submit_peer_request(struct drbd_conf *, struct drbd_peer_request *, const unsigned, const int); extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); -extern struct drbd_peer_request *drbd_alloc_ee(struct drbd_conf *, - u64, sector_t, unsigned int, - gfp_t) __must_hold(local); +extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_conf *, u64, + sector_t, unsigned int, + gfp_t) __must_hold(local); extern void drbd_free_some_ee(struct drbd_conf *, struct drbd_peer_request *, int); #define drbd_free_ee(m,e) drbd_free_some_ee(m, e, 0) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 2c157257a03..9e56010a107 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -308,7 +308,7 @@ You need to hold the req_lock: You must not have the req_lock: drbd_free_ee() - drbd_alloc_ee() + drbd_alloc_peer_req() drbd_release_ee() drbd_ee_fix_bhs() drbd_process_done_ee() @@ -317,8 +317,8 @@ You must not have the req_lock: */ struct drbd_peer_request * -drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector, - unsigned int data_size, gfp_t gfp_mask) __must_hold(local) +drbd_alloc_peer_req(struct drbd_conf *mdev, u64 id, sector_t sector, + unsigned int data_size, gfp_t gfp_mask) __must_hold(local) { struct drbd_peer_request *peer_req; struct page *page; @@ -330,7 +330,7 @@ drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector, peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM); if (!peer_req) { if (!(gfp_mask & __GFP_NOWARN)) - dev_err(DEV, "alloc_ee: Allocation of an EE failed\n"); + dev_err(DEV, "%s: allocation failed\n", __func__); return NULL; } @@ -1379,7 +1379,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD * "criss-cross" setup, that might cause write-out on some other DRBD, * which in turn might block on the other node at this very place. */ - peer_req = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO); + peer_req = drbd_alloc_peer_req(mdev, id, sector, data_size, GFP_NOIO); if (!peer_req) return NULL; @@ -2240,7 +2240,7 @@ static int receive_DataRequest(struct drbd_tconn *tconn, struct packet_info *pi) /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD * "criss-cross" setup, that might cause write-out on some other DRBD, * which in turn might block on the other node at this very place. */ - peer_req = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO); + peer_req = drbd_alloc_peer_req(mdev, p->block_id, sector, size, GFP_NOIO); if (!peer_req) { put_ldev(mdev); return -ENOMEM; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 086a4b6439a..396f0d019ea 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -354,7 +354,8 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) /* GFP_TRY, because if there is no memory available right now, this may * be rescheduled for later. It is "only" background resync, after all. */ - peer_req = drbd_alloc_ee(mdev, ID_SYNCER /* unused */, sector, size, GFP_TRY); + peer_req = drbd_alloc_peer_req(mdev, ID_SYNCER /* unused */, sector, + size, GFP_TRY); if (!peer_req) goto defer; From 3967deb192e147328e1a6085a443ea6afef54dbb Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 6 Apr 2011 16:16:56 +0200 Subject: [PATCH 311/609] drbd: Rename drbd_free_ee() and variants to *_peer_req() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 8 ++++---- drivers/block/drbd/drbd_receiver.c | 22 +++++++++++----------- drivers/block/drbd/drbd_worker.c | 22 +++++++++++----------- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index ff7182dde0d..0e9cce0fe2b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1459,10 +1459,10 @@ extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_conf *, u64, sector_t, unsigned int, gfp_t) __must_hold(local); -extern void drbd_free_some_ee(struct drbd_conf *, struct drbd_peer_request *, - int); -#define drbd_free_ee(m,e) drbd_free_some_ee(m, e, 0) -#define drbd_free_net_ee(m,e) drbd_free_some_ee(m, e, 1) +extern void __drbd_free_peer_req(struct drbd_conf *, struct drbd_peer_request *, + int); +#define drbd_free_peer_req(m,e) __drbd_free_peer_req(m, e, 0) +#define drbd_free_net_peer_req(m,e) __drbd_free_peer_req(m, e, 1) extern void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head); extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 9e56010a107..672f493b7c9 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -223,7 +223,7 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev) spin_unlock_irq(&mdev->tconn->req_lock); list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) - drbd_free_net_ee(mdev, peer_req); + drbd_free_net_peer_req(mdev, peer_req); } /** @@ -307,7 +307,7 @@ You need to hold the req_lock: _drbd_wait_ee_list_empty() You must not have the req_lock: - drbd_free_ee() + drbd_free_peer_req() drbd_alloc_peer_req() drbd_release_ee() drbd_ee_fix_bhs() @@ -362,7 +362,7 @@ drbd_alloc_peer_req(struct drbd_conf *mdev, u64 id, sector_t sector, return NULL; } -void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req, +void __drbd_free_peer_req(struct drbd_conf *mdev, struct drbd_peer_request *peer_req, int is_net) { if (peer_req->flags & EE_HAS_DIGEST) @@ -385,7 +385,7 @@ int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list) spin_unlock_irq(&mdev->tconn->req_lock); list_for_each_entry_safe(peer_req, t, &work_list, w.list) { - drbd_free_some_ee(mdev, peer_req, is_net); + __drbd_free_peer_req(mdev, peer_req, is_net); count++; } return count; @@ -412,7 +412,7 @@ static int drbd_process_done_ee(struct drbd_conf *mdev) spin_unlock_irq(&mdev->tconn->req_lock); list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) - drbd_free_net_ee(mdev, peer_req); + drbd_free_net_peer_req(mdev, peer_req); /* possible callbacks here: * e_end_block, and e_end_resync_block, e_send_discard_write. @@ -425,7 +425,7 @@ static int drbd_process_done_ee(struct drbd_conf *mdev) err2 = peer_req->w.cb(&peer_req->w, !!err); if (!err) err = err2; - drbd_free_ee(mdev, peer_req); + drbd_free_peer_req(mdev, peer_req); } wake_up(&mdev->ee_wait); @@ -1395,7 +1395,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, } kunmap(page); if (err) { - drbd_free_ee(mdev, peer_req); + drbd_free_peer_req(mdev, peer_req); return NULL; } ds -= len; @@ -1406,7 +1406,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, if (memcmp(dig_in, dig_vv, dgs)) { dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n", (unsigned long long)sector, data_size); - drbd_free_ee(mdev, peer_req); + drbd_free_peer_req(mdev, peer_req); return NULL; } } @@ -1547,7 +1547,7 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si list_del(&peer_req->w.list); spin_unlock_irq(&mdev->tconn->req_lock); - drbd_free_ee(mdev, peer_req); + drbd_free_peer_req(mdev, peer_req); fail: put_ldev(mdev); return -EIO; @@ -2109,7 +2109,7 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi) out_interrupted: drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP); put_ldev(mdev); - drbd_free_ee(mdev, peer_req); + drbd_free_peer_req(mdev, peer_req); return err; } @@ -2364,7 +2364,7 @@ submit: out_free_e: put_ldev(mdev); - drbd_free_ee(mdev, peer_req); + drbd_free_peer_req(mdev, peer_req); return -EIO; } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 396f0d019ea..befbb56443b 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -319,7 +319,7 @@ static int w_e_send_csum(struct drbd_work *w, int cancel) * some distributed deadlock, if the other side blocks on * congestion as well, because our receiver blocks in * drbd_pp_alloc due to pp_in_use > max_buffers. */ - drbd_free_ee(mdev, peer_req); + drbd_free_peer_req(mdev, peer_req); peer_req = NULL; inc_rs_pending(mdev); err = drbd_send_drequest_csum(mdev, sector, size, @@ -333,7 +333,7 @@ static int w_e_send_csum(struct drbd_work *w, int cancel) out: if (peer_req) - drbd_free_ee(mdev, peer_req); + drbd_free_peer_req(mdev, peer_req); if (unlikely(err)) dev_err(DEV, "drbd_send_drequest(..., csum) failed\n"); @@ -376,7 +376,7 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) list_del(&peer_req->w.list); spin_unlock_irq(&mdev->tconn->req_lock); - drbd_free_ee(mdev, peer_req); + drbd_free_peer_req(mdev, peer_req); defer: put_ldev(mdev); return -EAGAIN; @@ -900,7 +900,7 @@ static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_requ spin_unlock_irq(&mdev->tconn->req_lock); wake_up(&drbd_pp_wait); } else - drbd_free_ee(mdev, peer_req); + drbd_free_peer_req(mdev, peer_req); } /** @@ -916,7 +916,7 @@ int w_e_end_data_req(struct drbd_work *w, int cancel) int err; if (unlikely(cancel)) { - drbd_free_ee(mdev, peer_req); + drbd_free_peer_req(mdev, peer_req); dec_unacked(mdev); return 0; } @@ -953,7 +953,7 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel) int err; if (unlikely(cancel)) { - drbd_free_ee(mdev, peer_req); + drbd_free_peer_req(mdev, peer_req); dec_unacked(mdev); return 0; } @@ -1005,7 +1005,7 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) int err, eq = 0; if (unlikely(cancel)) { - drbd_free_ee(mdev, peer_req); + drbd_free_peer_req(mdev, peer_req); dec_unacked(mdev); return 0; } @@ -1088,7 +1088,7 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel) * some distributed deadlock, if the other side blocks on * congestion as well, because our receiver blocks in * drbd_pp_alloc due to pp_in_use > max_buffers. */ - drbd_free_ee(mdev, peer_req); + drbd_free_peer_req(mdev, peer_req); peer_req = NULL; inc_rs_pending(mdev); err = drbd_send_drequest_csum(mdev, sector, size, digest, digest_size, P_OV_REPLY); @@ -1098,7 +1098,7 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel) out: if (peer_req) - drbd_free_ee(mdev, peer_req); + drbd_free_peer_req(mdev, peer_req); dec_unacked(mdev); return err; } @@ -1126,7 +1126,7 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) int err, eq = 0; if (unlikely(cancel)) { - drbd_free_ee(mdev, peer_req); + drbd_free_peer_req(mdev, peer_req); dec_unacked(mdev); return 0; } @@ -1157,7 +1157,7 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) * some distributed deadlock, if the other side blocks on * congestion as well, because our receiver blocks in * drbd_pp_alloc due to pp_in_use > max_buffers. */ - drbd_free_ee(mdev, peer_req); + drbd_free_peer_req(mdev, peer_req); if (!eq) drbd_ov_out_of_sync_found(mdev, sector, size); else From 7721f5675e1d76278223d8b33575a59526508d80 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 6 Apr 2011 17:14:02 +0200 Subject: [PATCH 312/609] drbd: Rename drbd_release_ee() to drbd_free_peer_reqs() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 14 +++++++------- drivers/block/drbd/drbd_receiver.c | 6 +++--- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 0e9cce0fe2b..9d22a47de7a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1455,7 +1455,7 @@ extern int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector); extern int drbd_submit_peer_request(struct drbd_conf *, struct drbd_peer_request *, const unsigned, const int); -extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); +extern int drbd_free_peer_reqs(struct drbd_conf *, struct list_head *); extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_conf *, u64, sector_t, unsigned int, gfp_t) __must_hold(local); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 4f451b978cf..7c63b4d473f 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2178,27 +2178,27 @@ static struct notifier_block drbd_notifier = { .notifier_call = drbd_notify_sys, }; -static void drbd_release_ee_lists(struct drbd_conf *mdev) +static void drbd_release_all_peer_reqs(struct drbd_conf *mdev) { int rr; - rr = drbd_release_ee(mdev, &mdev->active_ee); + rr = drbd_free_peer_reqs(mdev, &mdev->active_ee); if (rr) dev_err(DEV, "%d EEs in active list found!\n", rr); - rr = drbd_release_ee(mdev, &mdev->sync_ee); + rr = drbd_free_peer_reqs(mdev, &mdev->sync_ee); if (rr) dev_err(DEV, "%d EEs in sync list found!\n", rr); - rr = drbd_release_ee(mdev, &mdev->read_ee); + rr = drbd_free_peer_reqs(mdev, &mdev->read_ee); if (rr) dev_err(DEV, "%d EEs in read list found!\n", rr); - rr = drbd_release_ee(mdev, &mdev->done_ee); + rr = drbd_free_peer_reqs(mdev, &mdev->done_ee); if (rr) dev_err(DEV, "%d EEs in done list found!\n", rr); - rr = drbd_release_ee(mdev, &mdev->net_ee); + rr = drbd_free_peer_reqs(mdev, &mdev->net_ee); if (rr) dev_err(DEV, "%d EEs in net list found!\n", rr); } @@ -2230,7 +2230,7 @@ void drbd_delete_device(unsigned int minor) drbd_free_resources(mdev); - drbd_release_ee_lists(mdev); + drbd_release_all_peer_reqs(mdev); lc_destroy(mdev->act_log); lc_destroy(mdev->resync); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 672f493b7c9..6a511665d3c 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -309,7 +309,7 @@ You need to hold the req_lock: You must not have the req_lock: drbd_free_peer_req() drbd_alloc_peer_req() - drbd_release_ee() + drbd_free_peer_reqs() drbd_ee_fix_bhs() drbd_process_done_ee() drbd_clear_done_ee() @@ -373,7 +373,7 @@ void __drbd_free_peer_req(struct drbd_conf *mdev, struct drbd_peer_request *peer mempool_free(peer_req, drbd_ee_mempool); } -int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list) +int drbd_free_peer_reqs(struct drbd_conf *mdev, struct list_head *list) { LIST_HEAD(work_list); struct drbd_peer_request *peer_req, *t; @@ -4175,7 +4175,7 @@ static int drbd_disconnected(int vnr, void *p, void *data) * Actually we don't care for exactly when the network stack does its * put_page(), but release our reference on these pages right here. */ - i = drbd_release_ee(mdev, &mdev->net_ee); + i = drbd_free_peer_reqs(mdev, &mdev->net_ee); if (i) dev_info(DEV, "net_ee not empty, killed %u entries\n", i); i = atomic_read(&mdev->pp_in_use_by_net); From a990be46827945664624b54741e050e1df2face3 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 6 Apr 2011 17:56:48 +0200 Subject: [PATCH 313/609] drbd: Rename reclaim_net_ee(), drbd_process_done_ee(), drbd_process_done_ee(), tconn_process_done_ee() to *_peer_reqs Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 42 ++++++++++++++---------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6a511665d3c..370dc7f390f 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -195,7 +195,8 @@ static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int return NULL; } -static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed) +static void reclaim_finished_net_peer_reqs(struct drbd_conf *mdev, + struct list_head *to_be_freed) { struct drbd_peer_request *peer_req; struct list_head *le, *tle; @@ -219,7 +220,7 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev) struct drbd_peer_request *peer_req, *t; spin_lock_irq(&mdev->tconn->req_lock); - reclaim_net_ee(mdev, &reclaimed); + reclaim_finished_net_peer_reqs(mdev, &reclaimed); spin_unlock_irq(&mdev->tconn->req_lock); list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) @@ -311,7 +312,7 @@ You must not have the req_lock: drbd_alloc_peer_req() drbd_free_peer_reqs() drbd_ee_fix_bhs() - drbd_process_done_ee() + drbd_finish_peer_reqs() drbd_clear_done_ee() drbd_wait_ee_list_empty() */ @@ -391,15 +392,10 @@ int drbd_free_peer_reqs(struct drbd_conf *mdev, struct list_head *list) return count; } - -/* See also comments in _req_mod(,BARRIER_ACKED) - * and receive_Barrier. - * - * Move entries from net_ee to done_ee, if ready. - * Grab done_ee, call all callbacks, free the entries. - * The callbacks typically send out ACKs. +/* + * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier. */ -static int drbd_process_done_ee(struct drbd_conf *mdev) +static int drbd_finish_peer_reqs(struct drbd_conf *mdev) { LIST_HEAD(work_list); LIST_HEAD(reclaimed); @@ -407,7 +403,7 @@ static int drbd_process_done_ee(struct drbd_conf *mdev) int err = 0; spin_lock_irq(&mdev->tconn->req_lock); - reclaim_net_ee(mdev, &reclaimed); + reclaim_finished_net_peer_reqs(mdev, &reclaimed); list_splice_init(&mdev->done_ee, &work_list); spin_unlock_irq(&mdev->tconn->req_lock); @@ -1491,8 +1487,10 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, return 0; } -/* e_end_resync_block() is called via - * drbd_process_done_ee() by asender only */ +/* + * e_end_resync_block() is called in asender context via + * drbd_finish_peer_reqs(). + */ static int e_end_resync_block(struct drbd_work *w, int unused) { struct drbd_peer_request *peer_req = @@ -1681,8 +1679,8 @@ static void restart_conflicting_writes(struct drbd_conf *mdev, } } -/* e_end_block() is called via drbd_process_done_ee(). - * this means this function only runs in the asender thread +/* + * e_end_block() is called in asender context via drbd_finish_peer_reqs(). */ static int e_end_block(struct drbd_work *w, int cancel) { @@ -4146,9 +4144,7 @@ static int drbd_disconnected(int vnr, void *p, void *data) * to be "canceled" */ drbd_flush_workqueue(mdev); - /* This also does reclaim_net_ee(). If we do this too early, we might - * miss some resync ee and pages.*/ - drbd_process_done_ee(mdev); + drbd_finish_peer_reqs(mdev); kfree(mdev->p_uuid); mdev->p_uuid = NULL; @@ -4813,7 +4809,7 @@ static int got_skip(struct drbd_tconn *tconn, struct packet_info *pi) return 0; } -static int tconn_process_done_ee(struct drbd_tconn *tconn) +static int tconn_finish_peer_reqs(struct drbd_tconn *tconn) { struct drbd_conf *mdev; int i, not_empty = 0; @@ -4822,7 +4818,7 @@ static int tconn_process_done_ee(struct drbd_tconn *tconn) clear_bit(SIGNAL_ASENDER, &tconn->flags); flush_signals(current); idr_for_each_entry(&tconn->volumes, mdev, i) { - if (drbd_process_done_ee(mdev)) + if (drbd_finish_peer_reqs(mdev)) return 1; /* error */ } set_bit(SIGNAL_ASENDER, &tconn->flags); @@ -4895,8 +4891,8 @@ int drbd_asender(struct drbd_thread *thi) much to send */ if (!tconn->net_conf->no_cork) drbd_tcp_cork(tconn->meta.socket); - if (tconn_process_done_ee(tconn)) { - conn_err(tconn, "tconn_process_done_ee() failed\n"); + if (tconn_finish_peer_reqs(tconn)) { + conn_err(tconn, "tconn_finish_peer_reqs() failed\n"); goto reconnect; } /* but unconditionally uncork unless disabled */ From 045417f75c718a4ac97fd44106b8aafcbca5a6da Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 7 Apr 2011 21:34:24 +0200 Subject: [PATCH 314/609] drbd: Rename drbd_{ ee -> peer_req }_has_active_page Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/drbd/drbd_worker.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 9d22a47de7a..bad5d92f83c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1594,7 +1594,7 @@ static inline int drbd_bio_has_active_page(struct bio *bio) return 0; } -static inline int drbd_ee_has_active_page(struct drbd_peer_request *peer_req) +static inline int drbd_peer_req_has_active_page(struct drbd_peer_request *peer_req) { struct page *page = peer_req->pages; page_chain_for_each(page) { diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 370dc7f390f..10b9276b28f 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -208,7 +208,7 @@ static void reclaim_finished_net_peer_reqs(struct drbd_conf *mdev, list_for_each_safe(le, tle, &mdev->net_ee) { peer_req = list_entry(le, struct drbd_peer_request, w.list); - if (drbd_ee_has_active_page(peer_req)) + if (drbd_peer_req_has_active_page(peer_req)) break; list_move(le, to_be_freed); } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index befbb56443b..1fc1c9e930e 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -890,7 +890,7 @@ out: /* helper */ static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_request *peer_req) { - if (drbd_ee_has_active_page(peer_req)) { + if (drbd_peer_req_has_active_page(peer_req)) { /* This might happen if sendpage() has not finished */ int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT; atomic_add(i, &mdev->pp_in_use_by_net); From d4da15374ba52009801d4c37c6dae0ba6ec0fca5 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 7 Apr 2011 00:06:56 +0200 Subject: [PATCH 315/609] drbd: Make drbd_wait_ee_list_empty() and _drbd_wait_ee_list_empty() static Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 ---- drivers/block/drbd/drbd_receiver.c | 6 ++++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index bad5d92f83c..ab938290e35 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1463,10 +1463,6 @@ extern void __drbd_free_peer_req(struct drbd_conf *, struct drbd_peer_request *, int); #define drbd_free_peer_req(m,e) __drbd_free_peer_req(m, e, 0) #define drbd_free_net_peer_req(m,e) __drbd_free_peer_req(m, e, 1) -extern void drbd_wait_ee_list_empty(struct drbd_conf *mdev, - struct list_head *head); -extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, - struct list_head *head); extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled); extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed); extern void conn_flush_workqueue(struct drbd_tconn *tconn); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 10b9276b28f..3f68404a17e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -428,7 +428,8 @@ static int drbd_finish_peer_reqs(struct drbd_conf *mdev) return err; } -void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head) +static void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, + struct list_head *head) { DEFINE_WAIT(wait); @@ -443,7 +444,8 @@ void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head) } } -void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head) +static void drbd_wait_ee_list_empty(struct drbd_conf *mdev, + struct list_head *head) { spin_lock_irq(&mdev->tconn->req_lock); _drbd_wait_ee_list_empty(mdev, head); From 18c2d52249fff156ff2db2e18cefe5184078af72 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 7 Apr 2011 21:08:50 +0200 Subject: [PATCH 316/609] drbd: Rename drbd_pp_first_pages_or_try_alloc() to __drbd_alloc_pages() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 3f68404a17e..68914a0d353 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -150,11 +150,12 @@ static void page_chain_add(struct page **head, *head = chain_first; } -static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number) +static struct page *__drbd_alloc_pages(struct drbd_conf *mdev, + unsigned int number) { struct page *page = NULL; struct page *tmp = NULL; - int i = 0; + unsigned int i = 0; /* Yes, testing drbd_pp_vacant outside the lock is racy. * So what. It saves a spin_lock. */ @@ -247,7 +248,7 @@ static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool /* Yes, we may run up to @number over max_buffers. If we * follow it strictly, the admin will get it wrong anyways. */ if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) - page = drbd_pp_first_pages_or_try_alloc(mdev, number); + page = __drbd_alloc_pages(mdev, number); while (page == NULL) { prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE); @@ -255,7 +256,7 @@ static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool drbd_kick_lo_and_reclaim_net(mdev); if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) { - page = drbd_pp_first_pages_or_try_alloc(mdev, number); + page = __drbd_alloc_pages(mdev, number); if (page) break; } From c37c8ecfee685fa42de8fd418ad8ca1e66408bd8 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 7 Apr 2011 21:02:09 +0200 Subject: [PATCH 317/609] drbd: Rename drbd_pp_alloc() to drbd_alloc_pages() and make it non-static Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_receiver.c | 15 ++++++++------- drivers/block/drbd/drbd_worker.c | 6 +++--- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index ab938290e35..9aca1f40e08 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1463,6 +1463,7 @@ extern void __drbd_free_peer_req(struct drbd_conf *, struct drbd_peer_request *, int); #define drbd_free_peer_req(m,e) __drbd_free_peer_req(m, e, 0) #define drbd_free_net_peer_req(m,e) __drbd_free_peer_req(m, e, 1) +extern struct page *drbd_alloc_pages(struct drbd_conf *, unsigned int, bool); extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled); extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed); extern void conn_flush_workqueue(struct drbd_tconn *tconn); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 68914a0d353..74995e24dc1 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -184,7 +184,7 @@ static struct page *__drbd_alloc_pages(struct drbd_conf *mdev, return page; /* Not enough pages immediately available this time. - * No need to jump around here, drbd_pp_alloc will retry this + * No need to jump around here, drbd_alloc_pages will retry this * function "soon". */ if (page) { tmp = page_chain_tail(page, NULL); @@ -229,7 +229,7 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev) } /** - * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled) + * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled) * @mdev: DRBD device. * @number: number of pages requested * @retry: whether to retry, if not enough pages are available right now @@ -240,7 +240,8 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev) * * Returns a page chain linked via page->private. */ -static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry) +struct page *drbd_alloc_pages(struct drbd_conf *mdev, unsigned int number, + bool retry) { struct page *page = NULL; DEFINE_WAIT(wait); @@ -265,7 +266,7 @@ static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool break; if (signal_pending(current)) { - dev_warn(DEV, "drbd_pp_alloc interrupted!\n"); + dev_warn(DEV, "drbd_alloc_pages interrupted!\n"); break; } @@ -278,7 +279,7 @@ static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool return page; } -/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc. +/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages. * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock); * Either links the page chain back to the global pool, * or returns all pages to the system. */ @@ -336,7 +337,7 @@ drbd_alloc_peer_req(struct drbd_conf *mdev, u64 id, sector_t sector, return NULL; } - page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT)); + page = drbd_alloc_pages(mdev, nr_pages, (gfp_mask & __GFP_WAIT)); if (!page) goto fail; @@ -1425,7 +1426,7 @@ static int drbd_drain_block(struct drbd_conf *mdev, int data_size) if (!data_size) return 0; - page = drbd_pp_alloc(mdev, 1, 1); + page = drbd_alloc_pages(mdev, 1, 1); data = kmap(page); while (data_size) { diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 1fc1c9e930e..ef35bbd5408 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -318,7 +318,7 @@ static int w_e_send_csum(struct drbd_work *w, int cancel) * In case we block on congestion, we could otherwise run into * some distributed deadlock, if the other side blocks on * congestion as well, because our receiver blocks in - * drbd_pp_alloc due to pp_in_use > max_buffers. */ + * drbd_alloc_pages due to pp_in_use > max_buffers. */ drbd_free_peer_req(mdev, peer_req); peer_req = NULL; inc_rs_pending(mdev); @@ -1087,7 +1087,7 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel) * In case we block on congestion, we could otherwise run into * some distributed deadlock, if the other side blocks on * congestion as well, because our receiver blocks in - * drbd_pp_alloc due to pp_in_use > max_buffers. */ + * drbd_alloc_pages due to pp_in_use > max_buffers. */ drbd_free_peer_req(mdev, peer_req); peer_req = NULL; inc_rs_pending(mdev); @@ -1156,7 +1156,7 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) * In case we block on congestion, we could otherwise run into * some distributed deadlock, if the other side blocks on * congestion as well, because our receiver blocks in - * drbd_pp_alloc due to pp_in_use > max_buffers. */ + * drbd_alloc_pages due to pp_in_use > max_buffers. */ drbd_free_peer_req(mdev, peer_req); if (!eq) drbd_ov_out_of_sync_found(mdev, sector, size); From 5cc287e0ae77bef615a046a44e17e1d68e23dbb7 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 7 Apr 2011 21:02:59 +0200 Subject: [PATCH 318/609] drbd: Rename drbd_pp_free() to drbd_free_pages() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 74995e24dc1..7e635b24eef 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -283,7 +283,7 @@ struct page *drbd_alloc_pages(struct drbd_conf *mdev, unsigned int number, * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock); * Either links the page chain back to the global pool, * or returns all pages to the system. */ -static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net) +static void drbd_free_pages(struct drbd_conf *mdev, struct page *page, int is_net) { atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use; int i; @@ -370,7 +370,7 @@ void __drbd_free_peer_req(struct drbd_conf *mdev, struct drbd_peer_request *peer { if (peer_req->flags & EE_HAS_DIGEST) kfree(peer_req->digest); - drbd_pp_free(mdev, peer_req->pages, is_net); + drbd_free_pages(mdev, peer_req->pages, is_net); D_ASSERT(atomic_read(&peer_req->pending_bios) == 0); D_ASSERT(drbd_interval_empty(&peer_req->i)); mempool_free(peer_req, drbd_ee_mempool); @@ -1438,7 +1438,7 @@ static int drbd_drain_block(struct drbd_conf *mdev, int data_size) data_size -= len; } kunmap(page); - drbd_pp_free(mdev, page, 0); + drbd_free_pages(mdev, page, 0); return err; } From ff370e5a9ebaf9499431216a531a196d98283228 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 11 Apr 2011 21:10:11 -0700 Subject: [PATCH 319/609] drbd: drbd_delete_device() takes a struct drbd_conf * now Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 12 ++++-------- drivers/block/drbd/drbd_nl.c | 2 +- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 9aca1f40e08..ce56aa08314 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1367,7 +1367,7 @@ extern rwlock_t global_state_lock; extern int conn_lowest_minor(struct drbd_tconn *tconn); enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr); extern void drbd_free_mdev(struct drbd_conf *mdev); -extern void drbd_delete_device(unsigned int minor); +extern void drbd_delete_device(struct drbd_conf *mdev); struct drbd_tconn *drbd_new_tconn(const char *name); extern void drbd_free_tconn(struct drbd_tconn *tconn); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 7c63b4d473f..93a16db8a99 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2204,15 +2204,10 @@ static void drbd_release_all_peer_reqs(struct drbd_conf *mdev) } /* caution. no locking. */ -void drbd_delete_device(unsigned int minor) +void drbd_delete_device(struct drbd_conf *mdev) { - struct drbd_conf *mdev = minor_to_mdev(minor); - - if (!mdev) - return; - idr_remove(&mdev->tconn->volumes, mdev->vnr); - idr_remove(&minors, minor); + idr_remove(&minors, mdev_to_minor(mdev)); synchronize_rcu(); /* paranoia asserts */ @@ -2265,7 +2260,8 @@ static void drbd_cleanup(void) drbd_genl_unregister(); idr_for_each_entry(&minors, mdev, i) - drbd_delete_device(i); + drbd_delete_device(mdev); + drbd_destroy_mempools(); unregister_blkdev(DRBD_MAJOR, "drbd"); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 0f52b88719c..f08fb6f49cd 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2861,7 +2861,7 @@ static enum drbd_ret_code adm_delete_minor(struct drbd_conf *mdev) * we may want to delete a minor from a live replication group. */ mdev->state.role == R_SECONDARY) { - drbd_delete_device(mdev_to_minor(mdev)); + drbd_delete_device(mdev); return NO_ERROR; } else return ERR_MINOR_CONFIGURED; From cd1d9950f69b46d88002b39652ed0cf3608d008b Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 11 Apr 2011 21:24:24 -0700 Subject: [PATCH 320/609] drbd: Inlined drbd_free_mdev(); it got called only from one place Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 - drivers/block/drbd/drbd_main.c | 25 +++++++------------------ 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index ce56aa08314..c49dc085d93 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1366,7 +1366,6 @@ extern rwlock_t global_state_lock; extern int conn_lowest_minor(struct drbd_tconn *tconn); enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr); -extern void drbd_free_mdev(struct drbd_conf *mdev); extern void drbd_delete_device(struct drbd_conf *mdev); struct drbd_tconn *drbd_new_tconn(const char *name); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 93a16db8a99..563427bfc27 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2233,10 +2233,13 @@ void drbd_delete_device(struct drbd_conf *mdev) kfree(mdev->p_uuid); /* mdev->p_uuid = NULL; */ - /* cleanup the rest that has been - * allocated from drbd_new_device - * and actually free the mdev itself */ - drbd_free_mdev(mdev); + kfree(mdev->current_epoch); + if (mdev->bitmap) /* should no longer be there. */ + drbd_bm_cleanup(mdev); + __free_page(mdev->md_io_page); + put_disk(mdev->vdisk); + blk_cleanup_queue(mdev->rq_queue); + kfree(mdev); } static void drbd_cleanup(void) @@ -2551,20 +2554,6 @@ out_no_q: return err; } -/* counterpart of drbd_new_device. - * last part of drbd_delete_device. */ -void drbd_free_mdev(struct drbd_conf *mdev) -{ - kfree(mdev->current_epoch); - if (mdev->bitmap) /* should no longer be there. */ - drbd_bm_cleanup(mdev); - __free_page(mdev->md_io_page); - put_disk(mdev->vdisk); - blk_cleanup_queue(mdev->rq_queue); - kfree(mdev); -} - - int __init drbd_init(void) { int err; From 695d08fa94ce5bb8d9880e260445fbcf50fa41b4 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 11 Apr 2011 22:53:32 -0700 Subject: [PATCH 321/609] drbd: rcu_read_[un]lock() for all idr accesses that do not sleep Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 12 ++++++--- drivers/block/drbd/drbd_nl.c | 41 +++++++++++++++++++++--------- drivers/block/drbd/drbd_receiver.c | 2 ++ drivers/block/drbd/drbd_state.c | 34 ++++++++++++++++++++++--- drivers/block/drbd/drbd_worker.c | 4 +++ 5 files changed, 74 insertions(+), 19 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 563427bfc27..5abbdaf0466 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -453,8 +453,10 @@ void tl_clear(struct drbd_tconn *tconn) } /* ensure bit indicating barrier is required is clear */ + rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) clear_bit(CREATE_BARRIER, &mdev->flags); + rcu_read_unlock(); spin_unlock_irq(&tconn->req_lock); } @@ -634,13 +636,15 @@ char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *tas int conn_lowest_minor(struct drbd_tconn *tconn) { - int vnr = 0; struct drbd_conf *mdev; + int vnr = 0, m; + rcu_read_lock(); mdev = idr_get_next(&tconn->volumes, &vnr); - if (!mdev) - return -1; - return mdev_to_minor(mdev); + m = mdev ? mdev_to_minor(mdev) : -1; + rcu_read_unlock(); + + return m; } #ifdef CONFIG_SMP diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index f08fb6f49cd..60c171b1715 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -372,12 +372,14 @@ static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn) struct drbd_conf *mdev; int vnr; + rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) { if (get_ldev_if_state(mdev, D_CONSISTENT)) { fp = max_t(enum drbd_fencing_p, fp, mdev->ldev->dc.fencing); put_ldev(mdev); } } + rcu_read_unlock(); return fp; } @@ -1624,29 +1626,41 @@ out: static bool conn_resync_running(struct drbd_tconn *tconn) { struct drbd_conf *mdev; + bool rv = false; int vnr; + rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) { if (mdev->state.conn == C_SYNC_SOURCE || mdev->state.conn == C_SYNC_TARGET || mdev->state.conn == C_PAUSED_SYNC_S || - mdev->state.conn == C_PAUSED_SYNC_T) - return true; + mdev->state.conn == C_PAUSED_SYNC_T) { + rv = true; + break; + } } - return false; + rcu_read_unlock(); + + return rv; } static bool conn_ov_running(struct drbd_tconn *tconn) { struct drbd_conf *mdev; + bool rv = false; int vnr; + rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) { if (mdev->state.conn == C_VERIFY_S || - mdev->state.conn == C_VERIFY_T) - return true; + mdev->state.conn == C_VERIFY_T) { + rv = true; + break; + } } - return false; + rcu_read_unlock(); + + return rv; } int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) @@ -1858,26 +1872,28 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) goto fail; } + rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, i) { if (get_ldev(mdev)) { enum drbd_fencing_p fp = mdev->ldev->dc.fencing; put_ldev(mdev); if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) { retcode = ERR_STONITH_AND_PROT_A; - goto fail; + goto fail_rcu_unlock; } } if (mdev->state.role == R_PRIMARY && new_conf->want_lose) { retcode = ERR_DISCARD; - goto fail; + goto fail_rcu_unlock; } if (!mdev->bitmap) { if(drbd_bm_init(mdev)) { retcode = ERR_NOMEM; - goto fail; + goto fail_rcu_unlock; } } } + rcu_read_unlock(); if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) { retcode = ERR_CONG_NOT_PROTO_A; @@ -1991,15 +2007,19 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) retcode = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE); spin_unlock_irq(&tconn->req_lock); + rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, i) { mdev->send_cnt = 0; mdev->recv_cnt = 0; kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); } + rcu_read_unlock(); conn_reconfig_done(tconn); drbd_adm_finish(info, retcode); return 0; +fail_rcu_unlock: + rcu_read_unlock(); fail: kfree(int_dig_in); kfree(int_dig_vv); @@ -2562,8 +2582,6 @@ int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb) /* synchronize with drbd_new_tconn/drbd_free_tconn */ mutex_lock(&drbd_cfg_mutex); - /* synchronize with drbd_delete_device */ - rcu_read_lock(); next_tconn: /* revalidate iterator position */ list_for_each_entry(tmp, &drbd_tconns, all_tconn) { @@ -2624,7 +2642,6 @@ next_tconn: } out: - rcu_read_unlock(); mutex_unlock(&drbd_cfg_mutex); /* where to start the next iteration */ cb->args[0] = (long)pos; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7e635b24eef..2b0b0ab90f2 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4828,11 +4828,13 @@ static int tconn_finish_peer_reqs(struct drbd_tconn *tconn) set_bit(SIGNAL_ASENDER, &tconn->flags); spin_lock_irq(&tconn->req_lock); + rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, i) { not_empty = !list_empty(&mdev->done_ee); if (not_empty) break; } + rcu_read_unlock(); spin_unlock_irq(&tconn->req_lock); } while (not_empty); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index c479577923c..52ebd9a9b03 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -55,15 +55,21 @@ static inline bool is_susp(union drbd_state s) bool conn_all_vols_unconf(struct drbd_tconn *tconn) { struct drbd_conf *mdev; + bool rv = true; int vnr; + rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) { if (mdev->state.disk != D_DISKLESS || mdev->state.conn != C_STANDALONE || - mdev->state.role != R_SECONDARY) - return false; + mdev->state.role != R_SECONDARY) { + rv = false; + break; + } } - return true; + rcu_read_unlock(); + + return rv; } /* Unfortunately the states where not correctly ordered, when @@ -91,8 +97,10 @@ enum drbd_role conn_highest_role(struct drbd_tconn *tconn) struct drbd_conf *mdev; int vnr; + rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) role = max_role(role, mdev->state.role); + rcu_read_unlock(); return role; } @@ -103,8 +111,10 @@ enum drbd_role conn_highest_peer(struct drbd_tconn *tconn) struct drbd_conf *mdev; int vnr; + rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) peer = max_role(peer, mdev->state.peer); + rcu_read_unlock(); return peer; } @@ -115,8 +125,10 @@ enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn) struct drbd_conf *mdev; int vnr; + rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) ds = max_t(enum drbd_disk_state, ds, mdev->state.disk); + rcu_read_unlock(); return ds; } @@ -127,8 +139,10 @@ enum drbd_disk_state conn_lowest_disk(struct drbd_tconn *tconn) struct drbd_conf *mdev; int vnr; + rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) ds = min_t(enum drbd_disk_state, ds, mdev->state.disk); + rcu_read_unlock(); return ds; } @@ -139,8 +153,10 @@ enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn) struct drbd_conf *mdev; int vnr; + rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) ds = max_t(enum drbd_disk_state, ds, mdev->state.pdsk); + rcu_read_unlock(); return ds; } @@ -151,8 +167,10 @@ enum drbd_conns conn_lowest_conn(struct drbd_tconn *tconn) struct drbd_conf *mdev; int vnr; + rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) conn = min_t(enum drbd_conns, conn, mdev->state.conn); + rcu_read_unlock(); return conn; } @@ -1406,12 +1424,14 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) /* case1: The outdate peer handler is successful: */ if (ns_max.pdsk <= D_OUTDATED) { tl_clear(tconn); + rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) { if (test_bit(NEW_CUR_UUID, &mdev->flags)) { drbd_uuid_new_current(mdev); clear_bit(NEW_CUR_UUID, &mdev->flags); } } + rcu_read_unlock(); conn_request_state(tconn, (union drbd_state) { { .susp_fen = 1 } }, (union drbd_state) { { .susp_fen = 0 } }, @@ -1419,8 +1439,10 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) } /* case2: The connection was established again: */ if (ns_min.conn >= C_CONNECTED) { + rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) clear_bit(NEW_CUR_UUID, &mdev->flags); + rcu_read_unlock(); spin_lock_irq(&tconn->req_lock); _tl_restart(tconn, RESEND); _conn_request_state(tconn, @@ -1445,6 +1467,7 @@ void conn_old_common_state(struct drbd_tconn *tconn, union drbd_state *pcs, enum struct drbd_conf *mdev; int vnr, first_vol = 1; + rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) { os = mdev->state; @@ -1469,6 +1492,7 @@ void conn_old_common_state(struct drbd_tconn *tconn, union drbd_state *pcs, enum if (cs.pdsk != os.pdsk) flags &= ~CS_DC_PDSK; } + rcu_read_unlock(); *pf |= CS_DC_MASK; *pf &= flags; @@ -1484,6 +1508,7 @@ conn_is_valid_transition(struct drbd_tconn *tconn, union drbd_state mask, union struct drbd_conf *mdev; int vnr; + rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) { os = drbd_read_state(mdev); ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL); @@ -1509,6 +1534,7 @@ conn_is_valid_transition(struct drbd_tconn *tconn, union drbd_state mask, union if (rv < SS_SUCCESS) break; } + rcu_read_unlock(); if (rv < SS_SUCCESS && flags & CS_VERBOSE) print_st_err(mdev, os, ns, rv); @@ -1534,6 +1560,7 @@ conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state if (mask.conn == C_MASK) tconn->cstate = val.conn; + rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) { os = drbd_read_state(mdev); ns = apply_mask_val(os, mask, val); @@ -1559,6 +1586,7 @@ conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state ns_min.disk = min_t(enum drbd_disk_state, ns.disk, ns_min.disk); ns_min.pdsk = min_t(enum drbd_disk_state, ns.pdsk, ns_min.pdsk); } + rcu_read_unlock(); ns_min.susp = ns_max.susp = tconn->susp; ns_min.susp_nod = ns_max.susp_nod = tconn->susp_nod; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index ef35bbd5408..410900eb2ff 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1348,6 +1348,7 @@ static int _drbd_pause_after(struct drbd_conf *mdev) struct drbd_conf *odev; int i, rv = 0; + rcu_read_lock(); idr_for_each_entry(&minors, odev, i) { if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) continue; @@ -1355,6 +1356,7 @@ static int _drbd_pause_after(struct drbd_conf *mdev) rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL) != SS_NOTHING_TO_DO); } + rcu_read_unlock(); return rv; } @@ -1370,6 +1372,7 @@ static int _drbd_resume_next(struct drbd_conf *mdev) struct drbd_conf *odev; int i, rv = 0; + rcu_read_lock(); idr_for_each_entry(&minors, odev, i) { if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) continue; @@ -1380,6 +1383,7 @@ static int _drbd_resume_next(struct drbd_conf *mdev) != SS_NOTHING_TO_DO) ; } } + rcu_read_unlock(); return rv; } From ef356262846eb49821db7b20a131b6573e4c7d2e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 13 Apr 2011 14:21:29 -0700 Subject: [PATCH 322/609] drbd: Converted drbd_cfg_mutex into drbd_cfg_rwsem Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 +++- drivers/block/drbd/drbd_main.c | 10 +++++----- drivers/block/drbd/drbd_nl.c | 27 ++++++++++++++++----------- 3 files changed, 24 insertions(+), 17 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c49dc085d93..7896a648d4a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -171,7 +171,9 @@ drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) { extern struct ratelimit_state drbd_ratelimit_state; extern struct idr minors; extern struct list_head drbd_tconns; -extern struct mutex drbd_cfg_mutex; +extern struct rw_semaphore drbd_cfg_rwsem; +/* drbd_cfg_rwsem protects: drbd_tconns list, + note: non sleeping iterations over the idrs are protoected by RCU */ /* on the wire */ enum drbd_packet { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 5abbdaf0466..86fd4c82900 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -120,7 +120,7 @@ module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0 */ struct idr minors; struct list_head drbd_tconns; /* list of struct drbd_tconn */ -DEFINE_MUTEX(drbd_cfg_mutex); +DECLARE_RWSEM(drbd_cfg_rwsem); struct kmem_cache *drbd_request_cache; struct kmem_cache *drbd_ee_cache; /* peer requests */ @@ -2330,14 +2330,14 @@ struct drbd_tconn *conn_by_name(const char *name) if (!name || !name[0]) return NULL; - mutex_lock(&drbd_cfg_mutex); + down_read(&drbd_cfg_rwsem); list_for_each_entry(tconn, &drbd_tconns, all_tconn) { if (!strcmp(tconn->name, name)) goto found; } tconn = NULL; found: - mutex_unlock(&drbd_cfg_mutex); + up_read(&drbd_cfg_rwsem); return tconn; } @@ -2404,9 +2404,9 @@ struct drbd_tconn *drbd_new_tconn(const char *name) DRBD_ON_NO_DATA_DEF, /* on_no_data */ }; - mutex_lock(&drbd_cfg_mutex); + down_write(&drbd_cfg_rwsem); list_add_tail(&tconn->all_tconn, &drbd_tconns); - mutex_unlock(&drbd_cfg_mutex); + up_write(&drbd_cfg_rwsem); return tconn; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 60c171b1715..424dcb30ee1 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1905,7 +1905,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) new_my_addr = (struct sockaddr *)&new_conf->my_addr; new_peer_addr = (struct sockaddr *)&new_conf->peer_addr; - /* No need to take drbd_cfg_mutex here. All reconfiguration is + /* No need to take drbd_cfg_rwsem here. All reconfiguration is * strictly serialized on genl_lock(). We are protected against * concurrent reconfiguration/addition/deletion */ list_for_each_entry(oconn, &drbd_tconns, all_tconn) { @@ -2581,7 +2581,7 @@ int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb) */ /* synchronize with drbd_new_tconn/drbd_free_tconn */ - mutex_lock(&drbd_cfg_mutex); + down_read(&drbd_cfg_rwsem); next_tconn: /* revalidate iterator position */ list_for_each_entry(tmp, &drbd_tconns, all_tconn) { @@ -2642,7 +2642,7 @@ next_tconn: } out: - mutex_unlock(&drbd_cfg_mutex); + up_read(&drbd_cfg_rwsem); /* where to start the next iteration */ cb->args[0] = (long)pos; cb->args[1] = (pos == tconn) ? volume + 1 : 0; @@ -2894,9 +2894,9 @@ int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - mutex_lock(&drbd_cfg_mutex); + down_write(&drbd_cfg_rwsem); retcode = adm_delete_minor(adm_ctx.mdev); - mutex_unlock(&drbd_cfg_mutex); + up_write(&drbd_cfg_rwsem); /* if this was the last volume of this connection, * this will terminate all threads */ if (retcode == NO_ERROR) @@ -2924,7 +2924,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) goto out; } - mutex_lock(&drbd_cfg_mutex); + down_read(&drbd_cfg_rwsem); /* demote */ idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { retcode = drbd_set_role(mdev, R_SECONDARY, 0); @@ -2951,14 +2951,17 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) goto out_unlock; } } + up_read(&drbd_cfg_rwsem); /* delete volumes */ + down_write(&drbd_cfg_rwsem); idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { retcode = adm_delete_minor(mdev); if (retcode != NO_ERROR) { /* "can not happen" */ drbd_msg_put_info("failed to delete volume"); - goto out_unlock; + up_write(&drbd_cfg_rwsem); + goto out; } } @@ -2973,10 +2976,12 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) /* "can not happen" */ retcode = ERR_CONN_IN_USE; drbd_msg_put_info("failed to delete connection"); - goto out_unlock; } + + up_write(&drbd_cfg_rwsem); + goto out; out_unlock: - mutex_unlock(&drbd_cfg_mutex); + up_read(&drbd_cfg_rwsem); out: drbd_adm_finish(info, retcode); return 0; @@ -2992,14 +2997,14 @@ int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - mutex_lock(&drbd_cfg_mutex); + down_write(&drbd_cfg_rwsem); if (conn_lowest_minor(adm_ctx.tconn) < 0) { drbd_free_tconn(adm_ctx.tconn); retcode = NO_ERROR; } else { retcode = ERR_CONN_IN_USE; } - mutex_unlock(&drbd_cfg_mutex); + up_write(&drbd_cfg_rwsem); out: drbd_adm_finish(info, retcode); From d3fcb4908d8cce7f29cff16bbef3b08933148003 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 13 Apr 2011 14:46:05 -0700 Subject: [PATCH 323/609] drbd: protect all idr accesses that might sleep with drbd_cfg_rwsem With this commit the locking for all accesses to IDRs is complete: * Non sleeping read accesses are protected by RCU * sleeping read accesses are protocted by a read lock on drbd_cfg_rwsem * accesses that add anything are protected by a write lock * accesses that remove an object are protoected by a write lock and a call to synchronize_rcu() after it is removed from the IDR and before the object is actually free()ed. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 2 ++ drivers/block/drbd/drbd_nl.c | 4 ++++ drivers/block/drbd/drbd_proc.c | 2 ++ drivers/block/drbd/drbd_receiver.c | 13 +++++++++++-- drivers/block/drbd/drbd_worker.c | 2 ++ 6 files changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 7896a648d4a..2119d9b02eb 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -172,7 +172,7 @@ extern struct ratelimit_state drbd_ratelimit_state; extern struct idr minors; extern struct list_head drbd_tconns; extern struct rw_semaphore drbd_cfg_rwsem; -/* drbd_cfg_rwsem protects: drbd_tconns list, +/* drbd_cfg_rwsem protects: drbd_tconns list, minors idr, tconn->volumes idr note: non sleeping iterations over the idrs are protoected by RCU */ /* on the wire */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 86fd4c82900..f298f9c2dbd 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2266,8 +2266,10 @@ static void drbd_cleanup(void) drbd_genl_unregister(); + down_write(&drbd_cfg_rwsem); idr_for_each_entry(&minors, mdev, i) drbd_delete_device(mdev); + up_write(&drbd_cfg_rwsem); drbd_destroy_mempools(); unregister_blkdev(DRBD_MAJOR, "drbd"); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 424dcb30ee1..dbaffcaf8e1 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -328,8 +328,10 @@ static void conn_md_sync(struct drbd_tconn *tconn) struct drbd_conf *mdev; int vnr; + down_read(&drbd_cfg_rwsem); idr_for_each_entry(&tconn->volumes, mdev, vnr) drbd_md_sync(mdev); + up_read(&drbd_cfg_rwsem); } int conn_khelper(struct drbd_tconn *tconn, char *cmd) @@ -2865,7 +2867,9 @@ int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info) goto out; } + down_write(&drbd_cfg_rwsem); retcode = conn_new_minor(adm_ctx.tconn, dh->minor, adm_ctx.volume); + up_write(&drbd_cfg_rwsem); out: drbd_adm_finish(info, retcode); return 0; diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index a4dbdbc52c1..4025d0883ba 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -227,6 +227,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v) oos .. known out-of-sync kB */ + down_read(&drbd_cfg_rwsem); idr_for_each_entry(&minors, mdev, i) { if (prev_i != i - 1) seq_printf(seq, "\n"); @@ -293,6 +294,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v) } } } + up_read(&drbd_cfg_rwsem); return 0; } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 2b0b0ab90f2..fd3859407a0 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -964,7 +964,10 @@ retry: if (drbd_send_protocol(tconn) == -EOPNOTSUPP) return -1; - return !idr_for_each(&tconn->volumes, drbd_connected, tconn); + down_read(&drbd_cfg_rwsem); + h = !idr_for_each(&tconn->volumes, drbd_connected, tconn); + up_read(&drbd_cfg_rwsem); + return h; out_release_sockets: if (tconn->data.socket) { @@ -4084,7 +4087,9 @@ static void drbd_disconnect(struct drbd_tconn *tconn) drbd_thread_stop(&tconn->asender); drbd_free_sock(tconn); + down_read(&drbd_cfg_rwsem); idr_for_each(&tconn->volumes, drbd_disconnected, tconn); + up_read(&drbd_cfg_rwsem); conn_info(tconn, "Connection closed\n"); if (conn_highest_role(tconn) == R_PRIMARY && conn_highest_pdsk(tconn) >= D_UNKNOWN) @@ -4821,10 +4826,14 @@ static int tconn_finish_peer_reqs(struct drbd_tconn *tconn) do { clear_bit(SIGNAL_ASENDER, &tconn->flags); flush_signals(current); + down_read(&drbd_cfg_rwsem); idr_for_each_entry(&tconn->volumes, mdev, i) { - if (drbd_finish_peer_reqs(mdev)) + if (drbd_finish_peer_reqs(mdev)) { + up_read(&drbd_cfg_rwsem); return 1; /* error */ + } } + up_read(&drbd_cfg_rwsem); set_bit(SIGNAL_ASENDER, &tconn->flags); spin_lock_irq(&tconn->req_lock); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 410900eb2ff..02cdff2b081 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1731,12 +1731,14 @@ int drbd_worker(struct drbd_thread *thi) spin_unlock_irq(&tconn->data.work.q_lock); drbd_thread_stop(&tconn->receiver); + down_read(&drbd_cfg_rwsem); idr_for_each_entry(&tconn->volumes, mdev, vnr) { D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); /* _drbd_set_state only uses stop_nowait. * wait here for the exiting receiver. */ drbd_mdev_cleanup(mdev); } + up_read(&drbd_cfg_rwsem); clear_bit(OBJECT_DYING, &tconn->flags); clear_bit(CONFIG_PENDING, &tconn->flags); wake_up(&tconn->ping_wait); From 303d1448a048fb5b099babc5f41d0b1e22238778 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 13 Apr 2011 16:24:47 -0700 Subject: [PATCH 324/609] drbd: Runtime changeable wire protocol The wire protocol is no longer a property that is negotiated between the two peers. It is now expressed with two bits (DP_SEND_WRITE_ACK and DP_SEND_RECEIVE_ACK) in each data packet. Therefore the primary node is free to change the wire protocol at any time without disconnect/reconnect. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 6 +++++ drivers/block/drbd/drbd_main.c | 8 +++++- drivers/block/drbd/drbd_receiver.c | 39 +++++++++++++++--------------- drivers/block/drbd/drbd_req.c | 18 ++++++++++---- drivers/block/drbd/drbd_req.h | 8 ++++++ 5 files changed, 53 insertions(+), 26 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 2119d9b02eb..c57cedb55f8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -327,6 +327,8 @@ extern unsigned int drbd_header_size(struct drbd_tconn *tconn); #define DP_FUA 16 /* equals REQ_FUA */ #define DP_FLUSH 32 /* equals REQ_FLUSH */ #define DP_DISCARD 64 /* equals REQ_DISCARD */ +#define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */ +#define DP_SEND_WRITE_ACK 256 /* This is a proto C write request */ struct p_data { u64 sector; /* 64 bits sector number */ @@ -656,6 +658,9 @@ enum { /* Conflicting local requests need to be restarted after this request */ __EE_RESTART_REQUESTS, + + /* The peer wants a write ACK for this (wire proto C) */ + __EE_SEND_WRITE_ACK, }; #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) @@ -663,6 +668,7 @@ enum { #define EE_WAS_ERROR (1<<__EE_WAS_ERROR) #define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST) #define EE_RESTART_REQUESTS (1<<__EE_RESTART_REQUESTS) +#define EE_SEND_WRITE_ACK (1<<__EE_SEND_WRITE_ACK) /* flag bits per mdev */ enum { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f298f9c2dbd..d3e3c111cbc 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1681,6 +1681,12 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn <= C_PAUSED_SYNC_T) dp_flags |= DP_MAY_SET_IN_SYNC; + if (mdev->tconn->agreed_pro_version >= 100) { + if (req->rq_state & RQ_EXP_RECEIVE_ACK) + dp_flags |= DP_SEND_RECEIVE_ACK; + if (req->rq_state & RQ_EXP_WRITE_ACK) + dp_flags |= DP_SEND_WRITE_ACK; + } p->dp_flags = cpu_to_be32(dp_flags); if (dgs) drbd_csum_bio(mdev, mdev->tconn->integrity_w_tfm, req->master_bio, p + 1); @@ -1697,7 +1703,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) * out ok after sending on this side, but does not fit on the * receiving side, we sure have detected corruption elsewhere. */ - if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A || dgs) + if (!(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK)) || dgs) err = _drbd_send_bio(mdev, req->master_bio); else err = _drbd_send_zc_bio(mdev, req->master_bio); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index fd3859407a0..295707ec12b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1697,7 +1697,7 @@ static int e_end_block(struct drbd_work *w, int cancel) sector_t sector = peer_req->i.sector; int err = 0, pcmd; - if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) { + if (peer_req->flags & EE_SEND_WRITE_ACK) { if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { pcmd = (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn <= C_PAUSED_SYNC_T && @@ -2074,20 +2074,28 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi) list_add(&peer_req->w.list, &mdev->active_ee); spin_unlock_irq(&mdev->tconn->req_lock); - switch (mdev->tconn->net_conf->wire_protocol) { - case DRBD_PROT_C: + if (mdev->tconn->agreed_pro_version < 100) { + switch (mdev->tconn->net_conf->wire_protocol) { + case DRBD_PROT_C: + dp_flags |= DP_SEND_WRITE_ACK; + break; + case DRBD_PROT_B: + dp_flags |= DP_SEND_RECEIVE_ACK; + break; + } + } + + if (dp_flags & DP_SEND_WRITE_ACK) { + peer_req->flags |= EE_SEND_WRITE_ACK; inc_unacked(mdev); /* corresponding dec_unacked() in e_end_block() * respective _drbd_clear_done_ee */ - break; - case DRBD_PROT_B: + } + + if (dp_flags & DP_SEND_RECEIVE_ACK) { /* I really don't like it that the receiver thread * sends on the msock, but anyways */ drbd_send_ack(mdev, P_RECV_ACK, peer_req); - break; - case DRBD_PROT_A: - /* nothing to do */ - break; } if (mdev->state.pdsk < D_INCONSISTENT) { @@ -2932,7 +2940,7 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) if (cf & CF_DRY_RUN) set_bit(CONN_DRY_RUN, &tconn->flags); - if (p_proto != tconn->net_conf->wire_protocol) { + if (p_proto != tconn->net_conf->wire_protocol && tconn->agreed_pro_version < 100) { conn_err(tconn, "incompatible communication protocols\n"); goto disconnect; } @@ -4622,23 +4630,18 @@ static int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi) } switch (pi->cmd) { case P_RS_WRITE_ACK: - D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); what = WRITE_ACKED_BY_PEER_AND_SIS; break; case P_WRITE_ACK: - D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); what = WRITE_ACKED_BY_PEER; break; case P_RECV_ACK: - D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B); what = RECV_ACKED_BY_PEER; break; case P_DISCARD_WRITE: - D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); what = DISCARD_WRITE; break; case P_RETRY_WRITE: - D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); what = POSTPONE_WRITE; break; default: @@ -4656,8 +4659,6 @@ static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi) struct p_block_ack *p = pi->data; sector_t sector = be64_to_cpu(p->sector); int size = be32_to_cpu(p->blksize); - bool missing_ok = tconn->net_conf->wire_protocol == DRBD_PROT_A || - tconn->net_conf->wire_protocol == DRBD_PROT_B; int err; mdev = vnr_to_mdev(tconn, pi->vnr); @@ -4674,15 +4675,13 @@ static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi) err = validate_req_change_req_state(mdev, p->block_id, sector, &mdev->write_requests, __func__, - NEG_ACKED, missing_ok); + NEG_ACKED, true); if (err) { /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs. The master bio might already be completed, therefore the request is no longer in the collision hash. */ /* In Protocol B we might already have got a P_RECV_ACK but then get a P_NEG_ACK afterwards. */ - if (!missing_ok) - return err; drbd_set_out_of_sync(mdev, sector, size); } return 0; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index fd0b4529a4b..0f1a29fc722 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -323,7 +323,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, struct bio_and_error *m) { struct drbd_conf *mdev = req->w.mdev; - int rv = 0; + int p, rv = 0; if (m) m->bio = NULL; @@ -344,6 +344,10 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * and from w_read_retry_remote */ D_ASSERT(!(req->rq_state & RQ_NET_MASK)); req->rq_state |= RQ_NET_PENDING; + p = mdev->tconn->net_conf->wire_protocol; + req->rq_state |= + p == DRBD_PROT_C ? RQ_EXP_WRITE_ACK : + p == DRBD_PROT_B ? RQ_EXP_RECEIVE_ACK : 0; inc_ap_pending(mdev); break; @@ -500,7 +504,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, atomic_add(req->i.size >> 9, &mdev->ap_in_flight); if (bio_data_dir(req->master_bio) == WRITE && - mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A) { + !(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK))) { /* this is what is dangerous about protocol A: * pretend it was successfully written on the peer. */ if (req->rq_state & RQ_NET_PENDING) { @@ -550,6 +554,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state |= RQ_NET_DONE; /* fall through */ case WRITE_ACKED_BY_PEER: + D_ASSERT(req->rq_state & RQ_EXP_WRITE_ACK); /* protocol C; successfully written on peer. * Nothing to do here. * We want to keep the tl in place for all protocols, to cater @@ -560,11 +565,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * request could set NET_DONE right here, and not wait for the * P_BARRIER_ACK, but that is an unnecessary optimization. */ + goto ack_common; /* this makes it effectively the same as for: */ case RECV_ACKED_BY_PEER: + D_ASSERT(req->rq_state & RQ_EXP_RECEIVE_ACK); /* protocol B; pretends to be successfully written on peer. * see also notes above in HANDED_OVER_TO_NETWORK about * protocol != C */ + ack_common: req->rq_state |= RQ_NET_OK; D_ASSERT(req->rq_state & RQ_NET_PENDING); dec_ap_pending(mdev); @@ -574,8 +582,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case POSTPONE_WRITE: - /* - * If this node has already detected the write conflict, the + D_ASSERT(req->rq_state & RQ_EXP_WRITE_ACK); + /* If this node has already detected the write conflict, the * worker will be waiting on misc_wait. Wake it up once this * request has completed locally. */ @@ -646,7 +654,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, } if ((req->rq_state & RQ_NET_MASK) != 0) { req->rq_state |= RQ_NET_DONE; - if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A) + if (!(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK))) atomic_sub(req->i.size>>9, &mdev->ap_in_flight); } _req_may_be_done(req, m); /* Allowed while state.susp */ diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 0dabfa9c82f..5135c95fbf8 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -198,6 +198,12 @@ enum drbd_req_state_bits { /* The peer has sent a retry ACK */ __RQ_POSTPONED, + + /* We expect a receive ACK (wire proto B) */ + __RQ_EXP_RECEIVE_ACK, + + /* We expect a write ACK (wite proto C) */ + __RQ_EXP_WRITE_ACK, }; #define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) @@ -219,6 +225,8 @@ enum drbd_req_state_bits { #define RQ_WRITE (1UL << __RQ_WRITE) #define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG) #define RQ_POSTPONED (1UL << __RQ_POSTPONED) +#define RQ_EXP_RECEIVE_ACK (1UL << __RQ_EXP_RECEIVE_ACK) +#define RQ_EXP_WRITE_ACK (1UL << __RQ_EXP_WRITE_ACK) /* For waking up the frozen transfer log mod_req() has to return if the request should be counted in the epoch object*/ From cd64397c0b7be5050c4127aba242c5d0ae6acaed Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 13 Apr 2011 18:00:59 -0700 Subject: [PATCH 325/609] drbd: Check consistency of net options when the get changed online Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 79 ++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 34 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index dbaffcaf8e1..40de384aade 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1665,6 +1665,45 @@ static bool conn_ov_running(struct drbd_tconn *tconn) return rv; } +static enum drbd_ret_code +check_net_options(struct drbd_tconn *tconn, struct net_conf *new_conf) +{ + struct drbd_conf *mdev; + int i; + + if (new_conf->two_primaries && + (new_conf->wire_protocol != DRBD_PROT_C)) + return ERR_NOT_PROTO_C; + + rcu_read_lock(); + idr_for_each_entry(&tconn->volumes, mdev, i) { + if (get_ldev(mdev)) { + enum drbd_fencing_p fp = mdev->ldev->dc.fencing; + put_ldev(mdev); + if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) { + rcu_read_unlock(); + return ERR_STONITH_AND_PROT_A; + } + } + if (mdev->state.role == R_PRIMARY && new_conf->want_lose) { + rcu_read_unlock(); + return ERR_DISCARD; + } + if (!mdev->bitmap) { + if(drbd_bm_init(mdev)) { + rcu_read_unlock(); + return ERR_NOMEM; + } + } + } + rcu_read_unlock(); + + if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) + return ERR_CONG_NOT_PROTO_A; + + return NO_ERROR; +} + int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; @@ -1709,6 +1748,10 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) goto fail; } + retcode = check_net_options(tconn, new_conf); + if (retcode != NO_ERROR) + goto fail; + /* re-sync running */ rsr = conn_resync_running(tconn); if (rsr && strcmp(new_conf->csums_alg, tconn->net_conf->csums_alg)) { @@ -1868,39 +1911,9 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) goto fail; } - if (new_conf->two_primaries - && (new_conf->wire_protocol != DRBD_PROT_C)) { - retcode = ERR_NOT_PROTO_C; + retcode = check_net_options(tconn, new_conf); + if (retcode != NO_ERROR) goto fail; - } - - rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, i) { - if (get_ldev(mdev)) { - enum drbd_fencing_p fp = mdev->ldev->dc.fencing; - put_ldev(mdev); - if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) { - retcode = ERR_STONITH_AND_PROT_A; - goto fail_rcu_unlock; - } - } - if (mdev->state.role == R_PRIMARY && new_conf->want_lose) { - retcode = ERR_DISCARD; - goto fail_rcu_unlock; - } - if (!mdev->bitmap) { - if(drbd_bm_init(mdev)) { - retcode = ERR_NOMEM; - goto fail_rcu_unlock; - } - } - } - rcu_read_unlock(); - - if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) { - retcode = ERR_CONG_NOT_PROTO_A; - goto fail; - } retcode = NO_ERROR; @@ -2020,8 +2033,6 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) drbd_adm_finish(info, retcode); return 0; -fail_rcu_unlock: - rcu_read_unlock(); fail: kfree(int_dig_in); kfree(int_dig_vv); From b032b6fa3528d6eed972db32257cb316a66e0dac Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 13 Apr 2011 18:16:10 -0700 Subject: [PATCH 326/609] drbd: Allow online change of replication protocol only with agreed_pv >= 100 Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 5 +++++ include/linux/drbd.h | 1 + 2 files changed, 6 insertions(+) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 40de384aade..d4b29fd603f 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1671,6 +1671,11 @@ check_net_options(struct drbd_tconn *tconn, struct net_conf *new_conf) struct drbd_conf *mdev; int i; + if (tconn->net_conf && tconn->agreed_pro_version < 100 && + tconn->cstate == C_WF_REPORT_PARAMS && + new_conf->wire_protocol != tconn->net_conf->wire_protocol) + return ERR_NEED_APV_100; + if (new_conf->two_primaries && (new_conf->wire_protocol != DRBD_PROT_C)) return ERR_NOT_PROTO_C; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index fe8d6ba31bc..6c7c85d8fc4 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -160,6 +160,7 @@ enum drbd_ret_code { ERR_MINOR_CONFIGURED = 160, ERR_MINOR_EXISTS = 161, ERR_INVALID_REQUEST = 162, + ERR_NEED_APV_100 = 163, /* insert new ones above this line */ AFTER_LAST_ERR_CODE From 44ed167da74825bfb7950d45a4f83bce3e84921c Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 19 Apr 2011 17:10:19 +0200 Subject: [PATCH 327/609] drbd: rcu_read_lock() and rcu_dereference() for tconn->net_conf Removing the get_net_conf()/put_net_conf() calls Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 15 +- drivers/block/drbd/drbd_main.c | 78 +++++---- drivers/block/drbd/drbd_nl.c | 188 +++++++++++++--------- drivers/block/drbd/drbd_proc.c | 9 +- drivers/block/drbd/drbd_receiver.c | 247 ++++++++++++++++++++--------- drivers/block/drbd/drbd_req.c | 40 +++-- drivers/block/drbd/drbd_state.c | 14 +- drivers/block/drbd/drbd_worker.c | 26 ++- 8 files changed, 401 insertions(+), 216 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c57cedb55f8..99da54ceb87 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -832,7 +832,7 @@ struct drbd_tconn { /* is a resource from the config file */ struct mutex cstate_mutex; /* Protects graceful disconnects */ unsigned long flags; - struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ + struct net_conf *net_conf; /* content protected by rcu */ atomic_t net_cnt; /* Users of net_conf */ wait_queue_head_t net_cnt_wait; wait_queue_head_t ping_wait; /* Woken upon reception of a ping, and a state change */ @@ -2059,11 +2059,14 @@ static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, * maybe re-implement using semaphores? */ static inline int drbd_get_max_buffers(struct drbd_conf *mdev) { - int mxb = 1000000; /* arbitrary limit on open requests */ - if (get_net_conf(mdev->tconn)) { - mxb = mdev->tconn->net_conf->max_buffers; - put_net_conf(mdev->tconn); - } + struct net_conf *nc; + int mxb; + + rcu_read_lock(); + nc = rcu_dereference(mdev->tconn->net_conf); + mxb = nc ? nc->max_buffers : 1000000; /* arbitrary limit on open requests */ + rcu_read_unlock(); + return mxb; } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index d3e3c111cbc..8c1f93031c6 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -843,15 +843,19 @@ int drbd_send_sync_param(struct drbd_conf *mdev) int size; const int apv = mdev->tconn->agreed_pro_version; enum drbd_packet cmd; + struct net_conf *nc; sock = &mdev->tconn->data; p = drbd_prepare_command(mdev, sock); if (!p) return -EIO; + rcu_read_lock(); + nc = rcu_dereference(mdev->tconn->net_conf); + size = apv <= 87 ? sizeof(struct p_rs_param) : apv == 88 ? sizeof(struct p_rs_param) - + strlen(mdev->tconn->net_conf->verify_alg) + 1 + + strlen(nc->verify_alg) + 1 : apv <= 94 ? sizeof(struct p_rs_param_89) : /* apv >= 95 */ sizeof(struct p_rs_param_95); @@ -876,9 +880,10 @@ int drbd_send_sync_param(struct drbd_conf *mdev) } if (apv >= 88) - strcpy(p->verify_alg, mdev->tconn->net_conf->verify_alg); + strcpy(p->verify_alg, nc->verify_alg); if (apv >= 89) - strcpy(p->csums_alg, mdev->tconn->net_conf->csums_alg); + strcpy(p->csums_alg, nc->csums_alg); + rcu_read_unlock(); return drbd_send_command(mdev, sock, cmd, size, NULL, 0); } @@ -887,36 +892,44 @@ int drbd_send_protocol(struct drbd_tconn *tconn) { struct drbd_socket *sock; struct p_protocol *p; + struct net_conf *nc; int size, cf; - if (tconn->net_conf->dry_run && tconn->agreed_pro_version < 92) { - conn_err(tconn, "--dry-run is not supported by peer"); - return -EOPNOTSUPP; - } - sock = &tconn->data; p = conn_prepare_command(tconn, sock); if (!p) return -EIO; + rcu_read_lock(); + nc = rcu_dereference(tconn->net_conf); + + if (nc->dry_run && tconn->agreed_pro_version < 92) { + rcu_read_unlock(); + mutex_unlock(&sock->mutex); + conn_err(tconn, "--dry-run is not supported by peer"); + return -EOPNOTSUPP; + } + size = sizeof(*p); if (tconn->agreed_pro_version >= 87) - size += strlen(tconn->net_conf->integrity_alg) + 1; + size += strlen(nc->integrity_alg) + 1; - p->protocol = cpu_to_be32(tconn->net_conf->wire_protocol); - p->after_sb_0p = cpu_to_be32(tconn->net_conf->after_sb_0p); - p->after_sb_1p = cpu_to_be32(tconn->net_conf->after_sb_1p); - p->after_sb_2p = cpu_to_be32(tconn->net_conf->after_sb_2p); - p->two_primaries = cpu_to_be32(tconn->net_conf->two_primaries); + p->protocol = cpu_to_be32(nc->wire_protocol); + p->after_sb_0p = cpu_to_be32(nc->after_sb_0p); + p->after_sb_1p = cpu_to_be32(nc->after_sb_1p); + p->after_sb_2p = cpu_to_be32(nc->after_sb_2p); + p->two_primaries = cpu_to_be32(nc->two_primaries); cf = 0; - if (tconn->net_conf->want_lose) + if (nc->want_lose) cf |= CF_WANT_LOSE; - if (tconn->net_conf->dry_run) + if (nc->dry_run) cf |= CF_DRY_RUN; p->conn_flags = cpu_to_be32(cf); if (tconn->agreed_pro_version >= 87) - strcpy(p->integrity_alg, tconn->net_conf->integrity_alg); + strcpy(p->integrity_alg, nc->integrity_alg); + rcu_read_unlock(); + return conn_send_command(tconn, sock, P_PROTOCOL, size, NULL, 0); } @@ -940,7 +953,9 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) mdev->comm_bm_set = drbd_bm_total_weight(mdev); p->uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set); - uuid_flags |= mdev->tconn->net_conf->want_lose ? 1 : 0; + rcu_read_lock(); + uuid_flags |= rcu_dereference(mdev->tconn->net_conf)->want_lose ? 1 : 0; + rcu_read_unlock(); uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0; uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0; p->uuid[UI_FLAGS] = cpu_to_be64(uuid_flags); @@ -1136,12 +1151,14 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, unsigned long rl; unsigned len; unsigned toggle; - int bits; + int bits, use_rle; /* may we use this feature? */ - if ((mdev->tconn->net_conf->use_rle == 0) || - (mdev->tconn->agreed_pro_version < 90)) - return 0; + rcu_read_lock(); + use_rle = rcu_dereference(mdev->tconn->net_conf)->use_rle; + rcu_read_unlock(); + if (!use_rle || mdev->tconn->agreed_pro_version < 90) + return 0; if (c->bit_offset >= c->bm_bits) return 0; /* nothing to do. */ @@ -1812,7 +1829,9 @@ int drbd_send(struct drbd_tconn *tconn, struct socket *sock, msg.msg_flags = msg_flags | MSG_NOSIGNAL; if (sock == tconn->data.socket) { - tconn->ko_count = tconn->net_conf->ko_count; + rcu_read_lock(); + tconn->ko_count = rcu_dereference(tconn->net_conf)->ko_count; + rcu_read_unlock(); drbd_update_congested(tconn); } do { @@ -3235,15 +3254,18 @@ const char *cmdname(enum drbd_packet cmd) */ int drbd_wait_misc(struct drbd_conf *mdev, struct drbd_interval *i) { - struct net_conf *net_conf = mdev->tconn->net_conf; + struct net_conf *nc; DEFINE_WAIT(wait); long timeout; - if (!net_conf) + rcu_read_lock(); + nc = rcu_dereference(mdev->tconn->net_conf); + if (!nc) { + rcu_read_unlock(); return -ETIMEDOUT; - timeout = MAX_SCHEDULE_TIMEOUT; - if (net_conf->ko_count) - timeout = net_conf->timeout * HZ / 10 * net_conf->ko_count; + } + timeout = nc->ko_count ? nc->timeout * HZ / 10 * nc->ko_count : MAX_SCHEDULE_TIMEOUT; + rcu_read_unlock(); /* Indicate to wake up mdev->misc_wait on progress. */ i->waiting = true; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index d4b29fd603f..34be84260be 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -257,27 +257,30 @@ static int drbd_adm_finish(struct genl_info *info, int retcode) static void setup_khelper_env(struct drbd_tconn *tconn, char **envp) { char *afs; + struct net_conf *nc; - if (get_net_conf(tconn)) { - switch (((struct sockaddr *)tconn->net_conf->peer_addr)->sa_family) { + rcu_read_lock(); + nc = rcu_dereference(tconn->net_conf); + if (nc) { + switch (((struct sockaddr *)nc->peer_addr)->sa_family) { case AF_INET6: afs = "ipv6"; snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6", - &((struct sockaddr_in6 *)tconn->net_conf->peer_addr)->sin6_addr); + &((struct sockaddr_in6 *)nc->peer_addr)->sin6_addr); break; case AF_INET: afs = "ipv4"; snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4", - &((struct sockaddr_in *)tconn->net_conf->peer_addr)->sin_addr); + &((struct sockaddr_in *)nc->peer_addr)->sin_addr); break; default: afs = "ssocks"; snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4", - &((struct sockaddr_in *)tconn->net_conf->peer_addr)->sin_addr); + &((struct sockaddr_in *)nc->peer_addr)->sin_addr); } snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs); - put_net_conf(tconn); } + rcu_read_unlock(); } int drbd_khelper(struct drbd_conf *mdev, char *cmd) @@ -493,6 +496,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) { const int max_tries = 4; enum drbd_state_rv rv = SS_UNKNOWN_ERROR; + struct net_conf *nc; int try = 0; int forced = 0; union drbd_state mask, val; @@ -550,7 +554,12 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) if (rv == SS_TWO_PRIMARIES) { /* Maybe the peer is detected as dead very soon... retry at most once more in this case. */ - schedule_timeout_interruptible((mdev->tconn->net_conf->ping_timeo+1)*HZ/10); + int timeo; + rcu_read_lock(); + nc = rcu_dereference(mdev->tconn->net_conf); + timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1; + rcu_read_unlock(); + schedule_timeout_interruptible(timeo); if (try < max_tries) try = max_tries - 1; continue; @@ -580,10 +589,11 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) put_ldev(mdev); } } else { - if (get_net_conf(mdev->tconn)) { - mdev->tconn->net_conf->want_lose = 0; - put_net_conf(mdev->tconn); - } + rcu_read_lock(); + nc = rcu_dereference(mdev->tconn->net_conf); + if (nc) + nc->want_lose = 0; + rcu_read_unlock(); set_disk_ro(mdev->vdisk, false); if (get_ldev(mdev)) { if (((mdev->state.conn < C_CONNECTED || @@ -1193,6 +1203,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) struct lru_cache *resync_lru = NULL; union drbd_state ns, os; enum drbd_state_rv rv; + struct net_conf *nc; int cp_discovered = 0; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); @@ -1256,14 +1267,16 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto fail; } - if (get_net_conf(mdev->tconn)) { - int prot = mdev->tconn->net_conf->wire_protocol; - put_net_conf(mdev->tconn); - if (nbc->dc.fencing == FP_STONITH && prot == DRBD_PROT_A) { + rcu_read_lock(); + nc = rcu_dereference(mdev->tconn->net_conf); + if (nc) { + if (nbc->dc.fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) { + rcu_read_unlock(); retcode = ERR_STONITH_AND_PROT_A; goto fail; } } + rcu_read_unlock(); bdev = blkdev_get_by_path(nbc->dc.backing_dev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, mdev); @@ -1666,42 +1679,30 @@ static bool conn_ov_running(struct drbd_tconn *tconn) } static enum drbd_ret_code -check_net_options(struct drbd_tconn *tconn, struct net_conf *new_conf) +_check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct net_conf *new_conf) { struct drbd_conf *mdev; int i; - if (tconn->net_conf && tconn->agreed_pro_version < 100 && + if (old_conf && tconn->agreed_pro_version < 100 && tconn->cstate == C_WF_REPORT_PARAMS && - new_conf->wire_protocol != tconn->net_conf->wire_protocol) + new_conf->wire_protocol != old_conf->wire_protocol) return ERR_NEED_APV_100; if (new_conf->two_primaries && (new_conf->wire_protocol != DRBD_PROT_C)) return ERR_NOT_PROTO_C; - rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, i) { if (get_ldev(mdev)) { enum drbd_fencing_p fp = mdev->ldev->dc.fencing; put_ldev(mdev); - if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) { - rcu_read_unlock(); + if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) return ERR_STONITH_AND_PROT_A; - } } - if (mdev->state.role == R_PRIMARY && new_conf->want_lose) { - rcu_read_unlock(); + if (mdev->state.role == R_PRIMARY && new_conf->want_lose) return ERR_DISCARD; - } - if (!mdev->bitmap) { - if(drbd_bm_init(mdev)) { - rcu_read_unlock(); - return ERR_NOMEM; - } - } } - rcu_read_unlock(); if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) return ERR_CONG_NOT_PROTO_A; @@ -1709,11 +1710,33 @@ check_net_options(struct drbd_tconn *tconn, struct net_conf *new_conf) return NO_ERROR; } +static enum drbd_ret_code +check_net_options(struct drbd_tconn *tconn, struct net_conf *new_conf) +{ + static enum drbd_ret_code rv; + struct drbd_conf *mdev; + int i; + + rcu_read_lock(); + rv = _check_net_options(tconn, rcu_dereference(tconn->net_conf), new_conf); + rcu_read_unlock(); + + /* tconn->volumes protected by genl_lock() here */ + idr_for_each_entry(&tconn->volumes, mdev, i) { + if (!mdev->bitmap) { + if(drbd_bm_init(mdev)) + return ERR_NOMEM; + } + } + + return rv; +} + int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; struct drbd_tconn *tconn; - struct net_conf *new_conf = NULL; + struct net_conf *old_conf, *new_conf = NULL; int err; int ovr; /* online verify running */ int rsr; /* re-sync running */ @@ -1735,17 +1758,20 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) goto out; } - /* we also need a net config - * to change the options on */ - if (!get_net_conf(tconn)) { - drbd_msg_put_info("net conf missing, try connect"); - retcode = ERR_INVALID_REQUEST; - goto out; - } - conn_reconfig_start(tconn); - memcpy(new_conf, tconn->net_conf, sizeof(*new_conf)); + rcu_read_lock(); + old_conf = rcu_dereference(tconn->net_conf); + + if (!old_conf) { + drbd_msg_put_info("net conf missing, try connect"); + retcode = ERR_INVALID_REQUEST; + goto fail_rcu_unlock; + } + + *new_conf = *old_conf; + rcu_read_unlock(); + err = net_conf_from_attrs_for_change(new_conf, info); if (err) { retcode = ERR_MANDATORY_TAG; @@ -1759,10 +1785,13 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) /* re-sync running */ rsr = conn_resync_running(tconn); - if (rsr && strcmp(new_conf->csums_alg, tconn->net_conf->csums_alg)) { + rcu_read_lock(); + old_conf = rcu_dereference(tconn->net_conf); + if (rsr && old_conf && strcmp(new_conf->csums_alg, old_conf->csums_alg)) { retcode = ERR_CSUMS_RESYNC_RUNNING; - goto fail; + goto fail_rcu_unlock; } + rcu_read_unlock(); if (!rsr && new_conf->csums_alg[0]) { csums_tfm = crypto_alloc_hash(new_conf->csums_alg, 0, CRYPTO_ALG_ASYNC); @@ -1780,12 +1809,15 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) /* online verify running */ ovr = conn_ov_running(tconn); - if (ovr) { - if (strcmp(new_conf->verify_alg, tconn->net_conf->verify_alg)) { + rcu_read_lock(); + old_conf = rcu_dereference(tconn->net_conf); + if (ovr && old_conf) { + if (strcmp(new_conf->verify_alg, old_conf->verify_alg)) { retcode = ERR_VERIFY_RUNNING; - goto fail; + goto fail_rcu_unlock; } } + rcu_read_unlock(); if (!ovr && new_conf->verify_alg[0]) { verify_tfm = crypto_alloc_hash(new_conf->verify_alg, 0, CRYPTO_ALG_ASYNC); @@ -1801,16 +1833,9 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) } } - - /* For now, use struct assignment, not pointer assignment. - * We don't have any means to determine who might still - * keep a local alias into the struct, - * so we cannot just free it and hope for the best :( - * FIXME - * To avoid someone looking at a half-updated struct, we probably - * should have a rw-semaphor on net_conf and disk_conf. - */ - *tconn->net_conf = *new_conf; + rcu_assign_pointer(tconn->net_conf, new_conf); + synchronize_rcu(); + kfree(old_conf); if (!rsr) { crypto_free_hash(tconn->csums_tfm); @@ -1826,11 +1851,12 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) if (tconn->cstate >= C_WF_REPORT_PARAMS) drbd_send_sync_param(minor_to_mdev(conn_lowest_minor(tconn))); + fail_rcu_unlock: + rcu_read_unlock(); fail: crypto_free_hash(csums_tfm); crypto_free_hash(verify_tfm); kfree(new_conf); - put_net_conf(tconn); conn_reconfig_done(tconn); out: drbd_adm_finish(info, retcode); @@ -1841,7 +1867,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) { char hmac_name[CRYPTO_MAX_ALG_NAME]; struct drbd_conf *mdev; - struct net_conf *new_conf = NULL; + struct net_conf *old_conf, *new_conf = NULL; struct crypto_hash *tfm = NULL; struct crypto_hash *integrity_w_tfm = NULL; struct crypto_hash *integrity_r_tfm = NULL; @@ -1929,23 +1955,26 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) * strictly serialized on genl_lock(). We are protected against * concurrent reconfiguration/addition/deletion */ list_for_each_entry(oconn, &drbd_tconns, all_tconn) { + struct net_conf *nc; if (oconn == tconn) continue; - if (get_net_conf(oconn)) { - taken_addr = (struct sockaddr *)&oconn->net_conf->my_addr; - if (new_conf->my_addr_len == oconn->net_conf->my_addr_len && + + rcu_read_lock(); + nc = rcu_dereference(oconn->net_conf); + if (nc) { + taken_addr = (struct sockaddr *)&nc->my_addr; + if (new_conf->my_addr_len == nc->my_addr_len && !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len)) retcode = ERR_LOCAL_ADDR; - taken_addr = (struct sockaddr *)&oconn->net_conf->peer_addr; - if (new_conf->peer_addr_len == oconn->net_conf->peer_addr_len && + taken_addr = (struct sockaddr *)&nc->peer_addr; + if (new_conf->peer_addr_len == nc->peer_addr_len && !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len)) retcode = ERR_PEER_ADDR; - - put_net_conf(oconn); - if (retcode != NO_ERROR) - goto fail; } + rcu_read_unlock(); + if (retcode != NO_ERROR) + goto fail; } if (new_conf->cram_hmac_alg[0] != 0) { @@ -2004,12 +2033,15 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) conn_flush_workqueue(tconn); spin_lock_irq(&tconn->req_lock); - if (tconn->net_conf != NULL) { + rcu_read_lock(); + old_conf = rcu_dereference(tconn->net_conf); + if (old_conf != NULL) { retcode = ERR_NET_CONFIGURED; + rcu_read_unlock(); spin_unlock_irq(&tconn->req_lock); goto fail; } - tconn->net_conf = new_conf; + rcu_assign_pointer(tconn->net_conf, new_conf); crypto_free_hash(tconn->cram_hmac_tfm); tconn->cram_hmac_tfm = tfm; @@ -2464,9 +2496,9 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, const struct sib_info *sib) { struct state_info *si = NULL; /* for sizeof(si->member); */ + struct net_conf *nc; struct nlattr *nla; int got_ldev; - int got_net; int err = 0; int exclude_sensitive; @@ -2484,7 +2516,6 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, exclude_sensitive = sib || !capable(CAP_SYS_ADMIN); got_ldev = get_ldev(mdev); - got_net = get_net_conf(mdev->tconn); /* We need to add connection name and volume number information still. * Minor number is in drbd_genlmsghdr. */ @@ -2497,9 +2528,14 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, if (got_ldev) if (disk_conf_to_skb(skb, &mdev->ldev->dc, exclude_sensitive)) goto nla_put_failure; - if (got_net) - if (net_conf_to_skb(skb, mdev->tconn->net_conf, exclude_sensitive)) - goto nla_put_failure; + + rcu_read_lock(); + nc = rcu_dereference(mdev->tconn->net_conf); + if (nc) + err = net_conf_to_skb(skb, nc, exclude_sensitive); + rcu_read_unlock(); + if (err) + goto nla_put_failure; nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO); if (!nla) @@ -2546,8 +2582,6 @@ nla_put_failure: err = -EMSGSIZE; if (got_ldev) put_ldev(mdev); - if (got_net) - put_net_conf(mdev->tconn); return err; } diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 4025d0883ba..792a71ec2e6 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -197,6 +197,8 @@ static int drbd_seq_show(struct seq_file *seq, void *v) int i, prev_i = -1; const char *sn; struct drbd_conf *mdev; + struct net_conf *nc; + char wp; static char write_ordering_chars[] = { [WO_none] = 'n', @@ -240,6 +242,10 @@ static int drbd_seq_show(struct seq_file *seq, void *v) mdev->state.role == R_SECONDARY) { seq_printf(seq, "%2d: cs:Unconfigured\n", i); } else { + rcu_read_lock(); + nc = rcu_dereference(mdev->tconn->net_conf); + wp = nc ? nc->wire_protocol - DRBD_PROT_A + 'A' : ' '; + rcu_read_unlock(); seq_printf(seq, "%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c%c\n" " ns:%u nr:%u dw:%u dr:%u al:%u bm:%u " @@ -249,8 +255,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v) drbd_role_str(mdev->state.peer), drbd_disk_str(mdev->state.disk), drbd_disk_str(mdev->state.pdsk), - (mdev->tconn->net_conf == NULL ? ' ' : - (mdev->tconn->net_conf->wire_protocol - DRBD_PROT_A+'A')), + wp, drbd_suspended(mdev) ? 's' : 'r', mdev->state.aftr_isp ? 'a' : '-', mdev->state.peer_isp ? 'p' : '-', diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 295707ec12b..59f9af96374 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -244,11 +244,18 @@ struct page *drbd_alloc_pages(struct drbd_conf *mdev, unsigned int number, bool retry) { struct page *page = NULL; + struct net_conf *nc; DEFINE_WAIT(wait); + int mxb; /* Yes, we may run up to @number over max_buffers. If we * follow it strictly, the admin will get it wrong anyways. */ - if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) + rcu_read_lock(); + nc = rcu_dereference(mdev->tconn->net_conf); + mxb = nc ? nc->max_buffers : 1000000; + rcu_read_unlock(); + + if (atomic_read(&mdev->pp_in_use) < mxb) page = __drbd_alloc_pages(mdev, number); while (page == NULL) { @@ -256,7 +263,7 @@ struct page *drbd_alloc_pages(struct drbd_conf *mdev, unsigned int number, drbd_kick_lo_and_reclaim_net(mdev); - if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) { + if (atomic_read(&mdev->pp_in_use) < mxb) { page = __drbd_alloc_pages(mdev, number); if (page) break; @@ -607,24 +614,47 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn) const char *what; struct socket *sock; struct sockaddr_in6 src_in6; - int err; + struct sockaddr_in6 peer_in6; + struct net_conf *nc; + int err, peer_addr_len, my_addr_len; + int sndbuf_size, rcvbuf_size, try_connect_int; int disconnect_on_error = 1; - if (!get_net_conf(tconn)) + rcu_read_lock(); + nc = rcu_dereference(tconn->net_conf); + if (!nc) { + rcu_read_unlock(); return NULL; + } + + sndbuf_size = nc->sndbuf_size; + rcvbuf_size = nc->rcvbuf_size; + try_connect_int = nc->try_connect_int; + + my_addr_len = min_t(int, nc->my_addr_len, sizeof(src_in6)); + memcpy(&src_in6, nc->my_addr, my_addr_len); + + if (((struct sockaddr *)nc->my_addr)->sa_family == AF_INET6) + src_in6.sin6_port = 0; + else + ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */ + + peer_addr_len = min_t(int, nc->peer_addr_len, sizeof(src_in6)); + memcpy(&peer_in6, nc->peer_addr, peer_addr_len); + + rcu_read_unlock(); what = "sock_create_kern"; - err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family, - SOCK_STREAM, IPPROTO_TCP, &sock); + err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family, + SOCK_STREAM, IPPROTO_TCP, &sock); if (err < 0) { sock = NULL; goto out; } sock->sk->sk_rcvtimeo = - sock->sk->sk_sndtimeo = tconn->net_conf->try_connect_int*HZ; - drbd_setbufsize(sock, tconn->net_conf->sndbuf_size, - tconn->net_conf->rcvbuf_size); + sock->sk->sk_sndtimeo = try_connect_int * HZ; + drbd_setbufsize(sock, sndbuf_size, rcvbuf_size); /* explicitly bind to the configured IP as source IP * for the outgoing connections. @@ -633,17 +663,8 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn) * Make sure to use 0 as port number, so linux selects * a free one dynamically. */ - memcpy(&src_in6, tconn->net_conf->my_addr, - min_t(int, tconn->net_conf->my_addr_len, sizeof(src_in6))); - if (((struct sockaddr *)tconn->net_conf->my_addr)->sa_family == AF_INET6) - src_in6.sin6_port = 0; - else - ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */ - what = "bind before connect"; - err = sock->ops->bind(sock, - (struct sockaddr *) &src_in6, - tconn->net_conf->my_addr_len); + err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len); if (err < 0) goto out; @@ -651,9 +672,7 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn) * stay C_WF_CONNECTION, don't go Disconnecting! */ disconnect_on_error = 0; what = "connect"; - err = sock->ops->connect(sock, - (struct sockaddr *)tconn->net_conf->peer_addr, - tconn->net_conf->peer_addr_len, 0); + err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0); out: if (err < 0) { @@ -676,40 +695,52 @@ out: if (disconnect_on_error) conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); } - put_net_conf(tconn); + return sock; } static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn) { - int timeo, err; + int timeo, err, my_addr_len; + int sndbuf_size, rcvbuf_size, try_connect_int; struct socket *s_estab = NULL, *s_listen; + struct sockaddr_in6 my_addr; + struct net_conf *nc; const char *what; - if (!get_net_conf(tconn)) + rcu_read_lock(); + nc = rcu_dereference(tconn->net_conf); + if (!nc) { + rcu_read_unlock(); return NULL; + } + + sndbuf_size = nc->sndbuf_size; + rcvbuf_size = nc->rcvbuf_size; + try_connect_int = nc->try_connect_int; + + my_addr_len = min_t(int, nc->my_addr_len, sizeof(struct sockaddr_in6)); + memcpy(&my_addr, nc->my_addr, my_addr_len); + rcu_read_unlock(); what = "sock_create_kern"; - err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family, + err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family, SOCK_STREAM, IPPROTO_TCP, &s_listen); if (err) { s_listen = NULL; goto out; } - timeo = tconn->net_conf->try_connect_int * HZ; + timeo = try_connect_int * HZ; timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */ s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */ s_listen->sk->sk_rcvtimeo = timeo; s_listen->sk->sk_sndtimeo = timeo; - drbd_setbufsize(s_listen, tconn->net_conf->sndbuf_size, - tconn->net_conf->rcvbuf_size); + drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size); what = "bind before listen"; - err = s_listen->ops->bind(s_listen, - (struct sockaddr *) tconn->net_conf->my_addr, - tconn->net_conf->my_addr_len); + err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len); if (err < 0) goto out; @@ -724,7 +755,6 @@ out: conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); } } - put_net_conf(tconn); return s_estab; } @@ -817,7 +847,8 @@ int drbd_connected(int vnr, void *p, void *data) static int drbd_connect(struct drbd_tconn *tconn) { struct socket *sock, *msock; - int try, h, ok; + struct net_conf *nc; + int timeout, try, h, ok; if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS) return -2; @@ -924,11 +955,17 @@ retry: * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; * first set it to the P_CONNECTION_FEATURES timeout, * which we set to 4x the configured ping_timeout. */ - sock->sk->sk_sndtimeo = - sock->sk->sk_rcvtimeo = tconn->net_conf->ping_timeo*4*HZ/10; + rcu_read_lock(); + nc = rcu_dereference(tconn->net_conf); - msock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10; - msock->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ; + sock->sk->sk_sndtimeo = + sock->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10; + + msock->sk->sk_rcvtimeo = nc->ping_int*HZ; + timeout = nc->timeout * HZ / 10; + rcu_read_unlock(); + + msock->sk->sk_sndtimeo = timeout; /* we don't want delays. * we use TCP_CORK where appropriate, though */ @@ -956,7 +993,7 @@ retry: if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS) return 0; - sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10; + sock->sk->sk_sndtimeo = timeout; sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; drbd_thread_start(&tconn->asender); @@ -1842,7 +1879,9 @@ static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_s } prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE); spin_unlock(&mdev->peer_seq_lock); - timeout = mdev->tconn->net_conf->ping_timeo*HZ/10; + rcu_read_lock(); + timeout = rcu_dereference(mdev->tconn->net_conf)->ping_timeo*HZ/10; + rcu_read_unlock(); timeout = schedule_timeout(timeout); spin_lock(&mdev->peer_seq_lock); if (!timeout) { @@ -2075,7 +2114,8 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi) spin_unlock_irq(&mdev->tconn->req_lock); if (mdev->tconn->agreed_pro_version < 100) { - switch (mdev->tconn->net_conf->wire_protocol) { + rcu_read_lock(); + switch (rcu_dereference(mdev->tconn->net_conf)->wire_protocol) { case DRBD_PROT_C: dp_flags |= DP_SEND_WRITE_ACK; break; @@ -2083,6 +2123,7 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi) dp_flags |= DP_SEND_RECEIVE_ACK; break; } + rcu_read_unlock(); } if (dp_flags & DP_SEND_WRITE_ACK) { @@ -2385,6 +2426,7 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) { int self, peer, rv = -100; unsigned long ch_self, ch_peer; + enum drbd_after_sb_p after_sb_0p; self = mdev->ldev->md.uuid[UI_BITMAP] & 1; peer = mdev->p_uuid[UI_BITMAP] & 1; @@ -2392,10 +2434,14 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) ch_peer = mdev->p_uuid[UI_SIZE]; ch_self = mdev->comm_bm_set; - switch (mdev->tconn->net_conf->after_sb_0p) { + rcu_read_lock(); + after_sb_0p = rcu_dereference(mdev->tconn->net_conf)->after_sb_0p; + rcu_read_unlock(); + switch (after_sb_0p) { case ASB_CONSENSUS: case ASB_DISCARD_SECONDARY: case ASB_CALL_HELPER: + case ASB_VIOLENTLY: dev_err(DEV, "Configuration error.\n"); break; case ASB_DISCONNECT: @@ -2431,7 +2477,7 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) if (ch_peer == 0) { rv = 1; break; } if (ch_self == 0) { rv = -1; break; } } - if (mdev->tconn->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG) + if (after_sb_0p == ASB_DISCARD_ZERO_CHG) break; case ASB_DISCARD_LEAST_CHG: if (ch_self < ch_peer) @@ -2456,13 +2502,18 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) { int hg, rv = -100; + enum drbd_after_sb_p after_sb_1p; - switch (mdev->tconn->net_conf->after_sb_1p) { + rcu_read_lock(); + after_sb_1p = rcu_dereference(mdev->tconn->net_conf)->after_sb_1p; + rcu_read_unlock(); + switch (after_sb_1p) { case ASB_DISCARD_YOUNGER_PRI: case ASB_DISCARD_OLDER_PRI: case ASB_DISCARD_LEAST_CHG: case ASB_DISCARD_LOCAL: case ASB_DISCARD_REMOTE: + case ASB_DISCARD_ZERO_CHG: dev_err(DEV, "Configuration error.\n"); break; case ASB_DISCONNECT: @@ -2505,8 +2556,12 @@ static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) { int hg, rv = -100; + enum drbd_after_sb_p after_sb_2p; - switch (mdev->tconn->net_conf->after_sb_2p) { + rcu_read_lock(); + after_sb_2p = rcu_dereference(mdev->tconn->net_conf)->after_sb_2p; + rcu_read_unlock(); + switch (after_sb_2p) { case ASB_DISCARD_YOUNGER_PRI: case ASB_DISCARD_OLDER_PRI: case ASB_DISCARD_LEAST_CHG: @@ -2514,6 +2569,7 @@ static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) case ASB_DISCARD_REMOTE: case ASB_CONSENSUS: case ASB_DISCARD_SECONDARY: + case ASB_DISCARD_ZERO_CHG: dev_err(DEV, "Configuration error.\n"); break; case ASB_VIOLENTLY: @@ -2758,9 +2814,10 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role, enum drbd_disk_state peer_disk) __must_hold(local) { - int hg, rule_nr; enum drbd_conns rv = C_MASK; enum drbd_disk_state mydisk; + struct net_conf *nc; + int hg, rule_nr, rr_conflict, dry_run; mydisk = mdev->state.disk; if (mydisk == D_NEGOTIATING) @@ -2797,7 +2854,10 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol if (abs(hg) == 100) drbd_khelper(mdev, "initial-split-brain"); - if (hg == 100 || (hg == -100 && mdev->tconn->net_conf->always_asbp)) { + rcu_read_lock(); + nc = rcu_dereference(mdev->tconn->net_conf); + + if (hg == 100 || (hg == -100 && nc->always_asbp)) { int pcount = (mdev->state.role == R_PRIMARY) + (peer_role == R_PRIMARY); int forced = (hg == -100); @@ -2826,9 +2886,9 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } if (hg == -100) { - if (mdev->tconn->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1)) + if (nc->want_lose && !(mdev->p_uuid[UI_FLAGS]&1)) hg = -1; - if (!mdev->tconn->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1)) + if (!nc->want_lose && (mdev->p_uuid[UI_FLAGS]&1)) hg = 1; if (abs(hg) < 100) @@ -2836,6 +2896,9 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol "Sync from %s node\n", (hg < 0) ? "peer" : "this"); } + rr_conflict = nc->rr_conflict; + dry_run = nc->dry_run; + rcu_read_unlock(); if (hg == -100) { /* FIXME this log message is not correct if we end up here @@ -2854,7 +2917,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol if (hg < 0 && /* by intention we do not use mydisk here. */ mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) { - switch (mdev->tconn->net_conf->rr_conflict) { + switch (rr_conflict) { case ASB_CALL_HELPER: drbd_khelper(mdev, "pri-lost"); /* fall through */ @@ -2867,7 +2930,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } } - if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) { + if (dry_run || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) { if (hg == 0) dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n"); else @@ -2926,6 +2989,8 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; int p_want_lose, p_two_primaries, cf; char p_integrity_alg[SHARED_SECRET_MAX] = ""; + unsigned char *my_alg; + struct net_conf *nc; p_proto = be32_to_cpu(p->protocol); p_after_sb_0p = be32_to_cpu(p->after_sb_0p); @@ -2940,38 +3005,43 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) if (cf & CF_DRY_RUN) set_bit(CONN_DRY_RUN, &tconn->flags); - if (p_proto != tconn->net_conf->wire_protocol && tconn->agreed_pro_version < 100) { + rcu_read_lock(); + nc = rcu_dereference(tconn->net_conf); + + if (p_proto != nc->wire_protocol && tconn->agreed_pro_version < 100) { conn_err(tconn, "incompatible communication protocols\n"); - goto disconnect; + goto disconnect_rcu_unlock; } - if (cmp_after_sb(p_after_sb_0p, tconn->net_conf->after_sb_0p)) { + if (cmp_after_sb(p_after_sb_0p, nc->after_sb_0p)) { conn_err(tconn, "incompatible after-sb-0pri settings\n"); - goto disconnect; + goto disconnect_rcu_unlock; } - if (cmp_after_sb(p_after_sb_1p, tconn->net_conf->after_sb_1p)) { + if (cmp_after_sb(p_after_sb_1p, nc->after_sb_1p)) { conn_err(tconn, "incompatible after-sb-1pri settings\n"); - goto disconnect; + goto disconnect_rcu_unlock; } - if (cmp_after_sb(p_after_sb_2p, tconn->net_conf->after_sb_2p)) { + if (cmp_after_sb(p_after_sb_2p, nc->after_sb_2p)) { conn_err(tconn, "incompatible after-sb-2pri settings\n"); - goto disconnect; + goto disconnect_rcu_unlock; } - if (p_want_lose && tconn->net_conf->want_lose) { + if (p_want_lose && nc->want_lose) { conn_err(tconn, "both sides have the 'want_lose' flag set\n"); - goto disconnect; + goto disconnect_rcu_unlock; } - if (p_two_primaries != tconn->net_conf->two_primaries) { + if (p_two_primaries != nc->two_primaries) { conn_err(tconn, "incompatible setting of the two-primaries options\n"); - goto disconnect; + goto disconnect_rcu_unlock; } + my_alg = nc->integrity_alg; + rcu_read_unlock(); + if (tconn->agreed_pro_version >= 87) { - unsigned char *my_alg = tconn->net_conf->integrity_alg; int err; err = drbd_recv_all(tconn, p_integrity_alg, pi->size); @@ -2989,6 +3059,8 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) return 0; +disconnect_rcu_unlock: + rcu_read_unlock(); disconnect: conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); return -EIO; @@ -4322,19 +4394,26 @@ static int drbd_do_auth(struct drbd_tconn *tconn) char *response = NULL; char *right_response = NULL; char *peers_ch = NULL; - unsigned int key_len = strlen(tconn->net_conf->shared_secret); + unsigned int key_len; + char secret[SHARED_SECRET_MAX]; /* 64 byte */ unsigned int resp_size; struct hash_desc desc; struct packet_info pi; + struct net_conf *nc; int err, rv; /* FIXME: Put the challenge/response into the preallocated socket buffer. */ + rcu_read_lock(); + nc = rcu_dereference(tconn->net_conf); + key_len = strlen(nc->shared_secret); + memcpy(secret, nc->shared_secret, key_len); + rcu_read_unlock(); + desc.tfm = tconn->cram_hmac_tfm; desc.flags = 0; - rv = crypto_hash_setkey(tconn->cram_hmac_tfm, - (u8 *)tconn->net_conf->shared_secret, key_len); + rv = crypto_hash_setkey(tconn->cram_hmac_tfm, (u8 *)secret, key_len); if (rv) { conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv); rv = -1; @@ -4456,8 +4535,8 @@ static int drbd_do_auth(struct drbd_tconn *tconn) rv = !memcmp(response, right_response, resp_size); if (rv) - conn_info(tconn, "Peer authenticated using %d bytes of '%s' HMAC\n", - resp_size, tconn->net_conf->cram_hmac_alg); + conn_info(tconn, "Peer authenticated using %d bytes HMAC\n", + resp_size); else rv = -1; @@ -4884,33 +4963,42 @@ int drbd_asender(struct drbd_thread *thi) int received = 0; unsigned int header_size = drbd_header_size(tconn); int expect = header_size; - int ping_timeout_active = 0; + bool ping_timeout_active = false; + struct net_conf *nc; + int ping_timeo, no_cork, ping_int; current->policy = SCHED_RR; /* Make this a realtime task! */ current->rt_priority = 2; /* more important than all other tasks */ while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); + + rcu_read_lock(); + nc = rcu_dereference(tconn->net_conf); + ping_timeo = nc->ping_timeo; + no_cork = nc->no_cork; + ping_int = nc->ping_int; + rcu_read_unlock(); + if (test_and_clear_bit(SEND_PING, &tconn->flags)) { if (drbd_send_ping(tconn)) { conn_err(tconn, "drbd_send_ping has failed\n"); goto reconnect; } - tconn->meta.socket->sk->sk_rcvtimeo = - tconn->net_conf->ping_timeo*HZ/10; - ping_timeout_active = 1; + tconn->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10; + ping_timeout_active = true; } /* TODO: conditionally cork; it may hurt latency if we cork without much to send */ - if (!tconn->net_conf->no_cork) + if (!no_cork) drbd_tcp_cork(tconn->meta.socket); if (tconn_finish_peer_reqs(tconn)) { conn_err(tconn, "tconn_finish_peer_reqs() failed\n"); goto reconnect; } /* but unconditionally uncork unless disabled */ - if (!tconn->net_conf->no_cork) + if (!no_cork) drbd_tcp_uncork(tconn->meta.socket); /* short circuit, recv_msg would return EINTR anyways. */ @@ -4984,10 +5072,11 @@ int drbd_asender(struct drbd_thread *thi) tconn->last_received = jiffies; - /* the idle_timeout (ping-int) - * has been restored in got_PingAck() */ - if (cmd == &asender_tbl[P_PING_ACK]) - ping_timeout_active = 0; + if (cmd == &asender_tbl[P_PING_ACK]) { + /* restore idle timeout */ + tconn->meta.socket->sk->sk_rcvtimeo = ping_int * HZ; + ping_timeout_active = false; + } buf = tconn->meta.rbuf; received = 0; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 0f1a29fc722..c4e4553f5c2 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -323,6 +323,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, struct bio_and_error *m) { struct drbd_conf *mdev = req->w.mdev; + struct net_conf *nc; int p, rv = 0; if (m) @@ -344,7 +345,10 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * and from w_read_retry_remote */ D_ASSERT(!(req->rq_state & RQ_NET_MASK)); req->rq_state |= RQ_NET_PENDING; - p = mdev->tconn->net_conf->wire_protocol; + rcu_read_lock(); + nc = rcu_dereference(mdev->tconn->net_conf); + p = nc->wire_protocol; + rcu_read_unlock(); req->rq_state |= p == DRBD_PROT_C ? RQ_EXP_WRITE_ACK : p == DRBD_PROT_B ? RQ_EXP_RECEIVE_ACK : 0; @@ -474,7 +478,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, drbd_queue_work(&mdev->tconn->data.work, &req->w); /* close the epoch, in case it outgrew the limit */ - if (mdev->tconn->newest_tle->n_writes >= mdev->tconn->net_conf->max_epoch_size) + rcu_read_lock(); + nc = rcu_dereference(mdev->tconn->net_conf); + p = nc->max_epoch_size; + rcu_read_unlock(); + if (mdev->tconn->newest_tle->n_writes >= p) queue_barrier(mdev); break; @@ -729,6 +737,7 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s const sector_t sector = bio->bi_sector; struct drbd_tl_epoch *b = NULL; struct drbd_request *req; + struct net_conf *nc; int local, remote, send_oos = 0; int err; int ret = 0; @@ -935,17 +944,19 @@ allocate_barrier: if (send_oos && drbd_set_out_of_sync(mdev, sector, size)) _req_mod(req, QUEUE_FOR_SEND_OOS); + rcu_read_lock(); + nc = rcu_dereference(mdev->tconn->net_conf); if (remote && - mdev->tconn->net_conf->on_congestion != OC_BLOCK && mdev->tconn->agreed_pro_version >= 96) { + nc->on_congestion != OC_BLOCK && mdev->tconn->agreed_pro_version >= 96) { int congested = 0; - if (mdev->tconn->net_conf->cong_fill && - atomic_read(&mdev->ap_in_flight) >= mdev->tconn->net_conf->cong_fill) { + if (nc->cong_fill && + atomic_read(&mdev->ap_in_flight) >= nc->cong_fill) { dev_info(DEV, "Congestion-fill threshold reached\n"); congested = 1; } - if (mdev->act_log->used >= mdev->tconn->net_conf->cong_extents) { + if (mdev->act_log->used >= nc->cong_extents) { dev_info(DEV, "Congestion-extents threshold reached\n"); congested = 1; } @@ -953,12 +964,13 @@ allocate_barrier: if (congested) { queue_barrier(mdev); /* last barrier, after mirrored writes */ - if (mdev->tconn->net_conf->on_congestion == OC_PULL_AHEAD) + if (nc->on_congestion == OC_PULL_AHEAD) _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL); - else /*mdev->tconn->net_conf->on_congestion == OC_DISCONNECT */ + else /*nc->on_congestion == OC_DISCONNECT */ _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL); } } + rcu_read_unlock(); spin_unlock_irq(&mdev->tconn->req_lock); kfree(b); /* if someone else has beaten us to it... */ @@ -1058,12 +1070,14 @@ void request_timer_fn(unsigned long data) struct drbd_tconn *tconn = mdev->tconn; struct drbd_request *req; /* oldest request */ struct list_head *le; - unsigned long et = 0; /* effective timeout = ko_count * timeout */ + struct net_conf *nc; + unsigned long et; /* effective timeout = ko_count * timeout */ + + rcu_read_lock(); + nc = rcu_dereference(tconn->net_conf); + et = nc ? nc->timeout * HZ/10 * nc->ko_count : 0; + rcu_read_unlock(); - if (get_net_conf(tconn)) { - et = tconn->net_conf->timeout*HZ/10 * tconn->net_conf->ko_count; - put_net_conf(tconn); - } if (!et || mdev->state.conn < C_WF_REPORT_PARAMS) return; /* Recurring timer stopped */ diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 52ebd9a9b03..f20a4a3807e 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -482,6 +482,7 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) enum drbd_fencing_p fp; enum drbd_state_rv rv = SS_SUCCESS; + struct net_conf *nc; fp = FP_DONT_CARE; if (get_ldev(mdev)) { @@ -489,14 +490,15 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) put_ldev(mdev); } - if (get_net_conf(mdev->tconn)) { - if (!mdev->tconn->net_conf->two_primaries && ns.role == R_PRIMARY) { + rcu_read_lock(); + nc = rcu_dereference(mdev->tconn->net_conf); + if (nc) { + if (!nc->two_primaries && ns.role == R_PRIMARY) { if (ns.peer == R_PRIMARY) rv = SS_TWO_PRIMARIES; else if (conn_highest_peer(mdev->tconn) == R_PRIMARY) rv = SS_O_VOL_PEER_PRI; - } - put_net_conf(mdev->tconn); + } } if (rv <= 0) @@ -531,7 +533,7 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) rv = SS_CONNECTED_OUTDATES; else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && - (mdev->tconn->net_conf->verify_alg[0] == 0)) + (nc->verify_alg[0] == 0)) rv = SS_NO_VERIFY_ALG; else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && @@ -541,6 +543,8 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN) rv = SS_CONNECTED_OUTDATES; + rcu_read_unlock(); + return rv; } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 02cdff2b081..2c43cf0918c 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1619,10 +1619,16 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) * detect connection loss, then waiting for a ping * response (implicit in drbd_resync_finished) reduces * the race considerably, but does not solve it. */ - if (side == C_SYNC_SOURCE) - schedule_timeout_interruptible( - mdev->tconn->net_conf->ping_int * HZ + - mdev->tconn->net_conf->ping_timeo*HZ/9); + if (side == C_SYNC_SOURCE) { + struct net_conf *nc; + int timeo; + + rcu_read_lock(); + nc = rcu_dereference(mdev->tconn->net_conf); + timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9; + rcu_read_unlock(); + schedule_timeout_interruptible(timeo); + } drbd_resync_finished(mdev); } @@ -1645,22 +1651,30 @@ int drbd_worker(struct drbd_thread *thi) struct drbd_tconn *tconn = thi->tconn; struct drbd_work *w = NULL; struct drbd_conf *mdev; + struct net_conf *nc; LIST_HEAD(work_list); int vnr, intr = 0; + int cork; while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); if (down_trylock(&tconn->data.work.s)) { mutex_lock(&tconn->data.mutex); - if (tconn->data.socket && !tconn->net_conf->no_cork) + + rcu_read_lock(); + nc = rcu_dereference(tconn->net_conf); + cork = nc ? !nc->no_cork : 0; + rcu_read_unlock(); + + if (tconn->data.socket && cork) drbd_tcp_uncork(tconn->data.socket); mutex_unlock(&tconn->data.mutex); intr = down_interruptible(&tconn->data.work.s); mutex_lock(&tconn->data.mutex); - if (tconn->data.socket && !tconn->net_conf->no_cork) + if (tconn->data.socket && cork) drbd_tcp_cork(tconn->data.socket); mutex_unlock(&tconn->data.mutex); } From 91fd4dad64ce7ac48c4c30c7756c6d3c41e8ad0a Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 20 Apr 2011 17:47:29 +0200 Subject: [PATCH 328/609] drbd: Proper locking for updates to net_conf under RCU Removing the get_net_conf()/put_net_conf() functions Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 29 ++-------- drivers/block/drbd/drbd_main.c | 17 +++++- drivers/block/drbd/drbd_nl.c | 68 +++++++++++------------- drivers/block/drbd/drbd_receiver.c | 85 +++++++++++++++++++----------- 4 files changed, 102 insertions(+), 97 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 99da54ceb87..83e6cadbe7a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -833,9 +833,8 @@ struct drbd_tconn { /* is a resource from the config file */ unsigned long flags; struct net_conf *net_conf; /* content protected by rcu */ - atomic_t net_cnt; /* Users of net_conf */ - wait_queue_head_t net_cnt_wait; - wait_queue_head_t ping_wait; /* Woken upon reception of a ping, and a state change */ + struct mutex net_conf_update; /* mutex for ready-copy-update of net_conf */ + wait_queue_head_t ping_wait; /* Woken upon reception of a ping, and a state change */ struct res_opts res_opts; struct drbd_socket data; /* data/barrier/cstate/parameter packets */ @@ -1379,6 +1378,7 @@ extern void drbd_delete_device(struct drbd_conf *mdev); struct drbd_tconn *drbd_new_tconn(const char *name); extern void drbd_free_tconn(struct drbd_tconn *tconn); struct drbd_tconn *conn_by_name(const char *name); +extern void conn_free_crypto(struct drbd_tconn *tconn); extern int proc_details; @@ -1935,29 +1935,6 @@ static inline void _sub_unacked(struct drbd_conf *mdev, int n, const char *func, ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); } -static inline void put_net_conf(struct drbd_tconn *tconn) -{ - if (atomic_dec_and_test(&tconn->net_cnt)) - wake_up(&tconn->net_cnt_wait); -} - -/** - * get_net_conf() - Increase ref count on mdev->tconn->net_conf; Returns 0 if nothing there - * @mdev: DRBD device. - * - * You have to call put_net_conf() when finished working with mdev->tconn->net_conf. - */ -static inline int get_net_conf(struct drbd_tconn *tconn) -{ - int have_net_conf; - - atomic_inc(&tconn->net_cnt); - have_net_conf = tconn->cstate >= C_UNCONNECTED; - if (!have_net_conf) - put_net_conf(tconn); - return have_net_conf; -} - /** * get_ldev() - Increase the ref count on mdev->ldev. Returns 0 if there is no ldev * @M: DRBD device. diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 8c1f93031c6..ba9a8b7afed 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2385,6 +2385,20 @@ static void drbd_free_socket(struct drbd_socket *socket) free_page((unsigned long) socket->rbuf); } +void conn_free_crypto(struct drbd_tconn *tconn) +{ + crypto_free_hash(tconn->cram_hmac_tfm); + crypto_free_hash(tconn->integrity_w_tfm); + crypto_free_hash(tconn->integrity_r_tfm); + kfree(tconn->int_dig_in); + kfree(tconn->int_dig_vv); + tconn->cram_hmac_tfm = NULL; + tconn->integrity_w_tfm = NULL; + tconn->integrity_r_tfm = NULL; + tconn->int_dig_in = NULL; + tconn->int_dig_vv = NULL; +} + struct drbd_tconn *drbd_new_tconn(const char *name) { struct drbd_tconn *tconn; @@ -2411,8 +2425,7 @@ struct drbd_tconn *drbd_new_tconn(const char *name) tconn->cstate = C_STANDALONE; mutex_init(&tconn->cstate_mutex); spin_lock_init(&tconn->req_lock); - atomic_set(&tconn->net_cnt, 0); - init_waitqueue_head(&tconn->net_cnt_wait); + mutex_init(&tconn->net_conf_update); init_waitqueue_head(&tconn->ping_wait); idr_init(&tconn->volumes); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 34be84260be..f86e882efca 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -589,11 +589,12 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) put_ldev(mdev); } } else { - rcu_read_lock(); - nc = rcu_dereference(mdev->tconn->net_conf); + mutex_lock(&mdev->tconn->net_conf_update); + nc = mdev->tconn->net_conf; if (nc) - nc->want_lose = 0; - rcu_read_unlock(); + nc->want_lose = 0; /* without copy; single bit op is atomic */ + mutex_unlock(&mdev->tconn->net_conf_update); + set_disk_ro(mdev->vdisk, false); if (get_ldev(mdev)) { if (((mdev->state.conn < C_CONNECTED || @@ -1760,17 +1761,16 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) conn_reconfig_start(tconn); - rcu_read_lock(); - old_conf = rcu_dereference(tconn->net_conf); + mutex_lock(&tconn->net_conf_update); + old_conf = tconn->net_conf; if (!old_conf) { drbd_msg_put_info("net conf missing, try connect"); retcode = ERR_INVALID_REQUEST; - goto fail_rcu_unlock; + goto fail; } *new_conf = *old_conf; - rcu_read_unlock(); err = net_conf_from_attrs_for_change(new_conf, info); if (err) { @@ -1785,13 +1785,10 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) /* re-sync running */ rsr = conn_resync_running(tconn); - rcu_read_lock(); - old_conf = rcu_dereference(tconn->net_conf); if (rsr && old_conf && strcmp(new_conf->csums_alg, old_conf->csums_alg)) { retcode = ERR_CSUMS_RESYNC_RUNNING; - goto fail_rcu_unlock; + goto fail; } - rcu_read_unlock(); if (!rsr && new_conf->csums_alg[0]) { csums_tfm = crypto_alloc_hash(new_conf->csums_alg, 0, CRYPTO_ALG_ASYNC); @@ -1809,15 +1806,12 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) /* online verify running */ ovr = conn_ov_running(tconn); - rcu_read_lock(); - old_conf = rcu_dereference(tconn->net_conf); - if (ovr && old_conf) { + if (ovr) { if (strcmp(new_conf->verify_alg, old_conf->verify_alg)) { retcode = ERR_VERIFY_RUNNING; - goto fail_rcu_unlock; + goto fail; } } - rcu_read_unlock(); if (!ovr && new_conf->verify_alg[0]) { verify_tfm = crypto_alloc_hash(new_conf->verify_alg, 0, CRYPTO_ALG_ASYNC); @@ -1834,8 +1828,6 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) } rcu_assign_pointer(tconn->net_conf, new_conf); - synchronize_rcu(); - kfree(old_conf); if (!rsr) { crypto_free_hash(tconn->csums_tfm); @@ -1848,15 +1840,21 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) verify_tfm = NULL; } + mutex_unlock(&tconn->net_conf_update); + synchronize_rcu(); + kfree(old_conf); + if (tconn->cstate >= C_WF_REPORT_PARAMS) drbd_send_sync_param(minor_to_mdev(conn_lowest_minor(tconn))); - fail_rcu_unlock: - rcu_read_unlock(); + goto done; + fail: + mutex_unlock(&tconn->net_conf_update); crypto_free_hash(csums_tfm); crypto_free_hash(verify_tfm); kfree(new_conf); + done: conn_reconfig_done(tconn); out: drbd_adm_finish(info, retcode); @@ -2032,32 +2030,26 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) } conn_flush_workqueue(tconn); - spin_lock_irq(&tconn->req_lock); - rcu_read_lock(); - old_conf = rcu_dereference(tconn->net_conf); - if (old_conf != NULL) { + + mutex_lock(&tconn->net_conf_update); + old_conf = tconn->net_conf; + if (old_conf) { retcode = ERR_NET_CONFIGURED; - rcu_read_unlock(); - spin_unlock_irq(&tconn->req_lock); + mutex_unlock(&tconn->net_conf_update); goto fail; } rcu_assign_pointer(tconn->net_conf, new_conf); - crypto_free_hash(tconn->cram_hmac_tfm); + conn_free_crypto(tconn); tconn->cram_hmac_tfm = tfm; - - crypto_free_hash(tconn->integrity_w_tfm); tconn->integrity_w_tfm = integrity_w_tfm; - - crypto_free_hash(tconn->integrity_r_tfm); tconn->integrity_r_tfm = integrity_r_tfm; + tconn->int_dig_in = int_dig_in; + tconn->int_dig_vv = int_dig_vv; - kfree(tconn->int_dig_in); - kfree(tconn->int_dig_vv); - tconn->int_dig_in=int_dig_in; - tconn->int_dig_vv=int_dig_vv; - retcode = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE); - spin_unlock_irq(&tconn->req_lock); + mutex_unlock(&tconn->net_conf_update); + + retcode = conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE); rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, i) { diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 59f9af96374..397f9770198 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3138,6 +3138,7 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) unsigned int header_size, data_size, exp_max_sz; struct crypto_hash *verify_tfm = NULL; struct crypto_hash *csums_tfm = NULL; + struct net_conf *old_conf, *new_conf = NULL; const int apv = tconn->agreed_pro_version; int *rs_plan_s = NULL; int fifo_size = 0; @@ -3212,10 +3213,13 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) p->csums_alg[SHARED_SECRET_MAX-1] = 0; } - if (strcmp(mdev->tconn->net_conf->verify_alg, p->verify_alg)) { + mutex_lock(&mdev->tconn->net_conf_update); + old_conf = mdev->tconn->net_conf; + + if (strcmp(old_conf->verify_alg, p->verify_alg)) { if (mdev->state.conn == C_WF_REPORT_PARAMS) { dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n", - mdev->tconn->net_conf->verify_alg, p->verify_alg); + old_conf->verify_alg, p->verify_alg); goto disconnect; } verify_tfm = drbd_crypto_alloc_digest_safe(mdev, @@ -3226,10 +3230,10 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) } } - if (apv >= 89 && strcmp(mdev->tconn->net_conf->csums_alg, p->csums_alg)) { + if (apv >= 89 && strcmp(old_conf->csums_alg, p->csums_alg)) { if (mdev->state.conn == C_WF_REPORT_PARAMS) { dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n", - mdev->tconn->net_conf->csums_alg, p->csums_alg); + old_conf->csums_alg, p->csums_alg); goto disconnect; } csums_tfm = drbd_crypto_alloc_digest_safe(mdev, @@ -3259,22 +3263,38 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) put_ldev(mdev); } + if (verify_tfm || csums_tfm) { + new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); + if (!new_conf) { + dev_err(DEV, "Allocation of new net_conf failed\n"); + goto disconnect; + } + + *new_conf = *old_conf; + + if (verify_tfm) { + strcpy(new_conf->verify_alg, p->verify_alg); + new_conf->verify_alg_len = strlen(p->verify_alg) + 1; + crypto_free_hash(mdev->tconn->verify_tfm); + mdev->tconn->verify_tfm = verify_tfm; + dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg); + } + if (csums_tfm) { + strcpy(new_conf->csums_alg, p->csums_alg); + new_conf->csums_alg_len = strlen(p->csums_alg) + 1; + crypto_free_hash(mdev->tconn->csums_tfm); + mdev->tconn->csums_tfm = csums_tfm; + dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg); + } + rcu_assign_pointer(tconn->net_conf, new_conf); + } + mutex_unlock(&mdev->tconn->net_conf_update); + if (new_conf) { + synchronize_rcu(); + kfree(old_conf); + } + spin_lock(&mdev->peer_seq_lock); - /* lock against drbd_nl_syncer_conf() */ - if (verify_tfm) { - strcpy(mdev->tconn->net_conf->verify_alg, p->verify_alg); - mdev->tconn->net_conf->verify_alg_len = strlen(p->verify_alg) + 1; - crypto_free_hash(mdev->tconn->verify_tfm); - mdev->tconn->verify_tfm = verify_tfm; - dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg); - } - if (csums_tfm) { - strcpy(mdev->tconn->net_conf->csums_alg, p->csums_alg); - mdev->tconn->net_conf->csums_alg_len = strlen(p->csums_alg) + 1; - crypto_free_hash(mdev->tconn->csums_tfm); - mdev->tconn->csums_tfm = csums_tfm; - dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg); - } if (fifo_size != mdev->rs_plan_s.size) { kfree(mdev->rs_plan_s.values); mdev->rs_plan_s.values = rs_plan_s; @@ -3286,6 +3306,7 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) return 0; disconnect: + mutex_unlock(&mdev->tconn->net_conf_update); /* just for completeness: actually not needed, * as this is not reached if csums_tfm was ok. */ crypto_free_hash(csums_tfm); @@ -3715,7 +3736,9 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) } } - mdev->tconn->net_conf->want_lose = 0; + mutex_lock(&mdev->tconn->net_conf_update); + mdev->tconn->net_conf->want_lose = 0; /* without copy; single bit op is atomic */ + mutex_unlock(&mdev->tconn->net_conf_update); drbd_md_sync(mdev); /* update connected indicator, la_size, ... */ @@ -4183,13 +4206,17 @@ static void drbd_disconnect(struct drbd_tconn *tconn) spin_unlock_irq(&tconn->req_lock); if (oc == C_DISCONNECTING) { - wait_event(tconn->net_cnt_wait, atomic_read(&tconn->net_cnt) == 0); + struct net_conf *old_conf; - crypto_free_hash(tconn->cram_hmac_tfm); - tconn->cram_hmac_tfm = NULL; + mutex_lock(&tconn->net_conf_update); + old_conf = tconn->net_conf; + rcu_assign_pointer(tconn->net_conf, NULL); + conn_free_crypto(tconn); + mutex_unlock(&tconn->net_conf_update); + + synchronize_rcu(); + kfree(old_conf); - kfree(tconn->net_conf); - tconn->net_conf = NULL; conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE); } } @@ -4568,12 +4595,8 @@ int drbdd_init(struct drbd_thread *thi) } } while (h == 0); - if (h > 0) { - if (get_net_conf(tconn)) { - drbdd(tconn); - put_net_conf(tconn); - } - } + if (h > 0) + drbdd(tconn); drbd_disconnect(tconn); From 302bdeae49842cbd2faec8203f49b1c4ef20294d Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 21 Apr 2011 11:36:49 +0200 Subject: [PATCH 329/609] drbd: Considering that the two_primaries config flag can change Now since it is possible to change the two_primaries config flag while the connection is up, make sure we treat a peer_req in a consistent way if the config flag changes while the peer_req is under IO. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 ++++ drivers/block/drbd/drbd_receiver.c | 19 ++++++++++++++----- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 83e6cadbe7a..3833d56b8de 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -661,6 +661,9 @@ enum { /* The peer wants a write ACK for this (wire proto C) */ __EE_SEND_WRITE_ACK, + + /* Is set when net_conf had two_primaries set while creating this peer_req */ + __EE_IN_INTERVAL_TREE, }; #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) @@ -669,6 +672,7 @@ enum { #define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST) #define EE_RESTART_REQUESTS (1<<__EE_RESTART_REQUESTS) #define EE_SEND_WRITE_ACK (1<<__EE_SEND_WRITE_ACK) +#define EE_IN_INTERVAL_TREE (1<<__EE_IN_INTERVAL_TREE) /* flag bits per mdev */ enum { diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 397f9770198..4665ad79b4a 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1752,7 +1752,7 @@ static int e_end_block(struct drbd_work *w, int cancel) } /* we delete from the conflict detection hash _after_ we sent out the * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ - if (mdev->tconn->net_conf->two_primaries) { + if (peer_req->flags & EE_IN_INTERVAL_TREE) { spin_lock_irq(&mdev->tconn->req_lock); D_ASSERT(!drbd_interval_empty(&peer_req->i)); drbd_remove_epoch_entry_interval(mdev, peer_req); @@ -1811,14 +1811,19 @@ static u32 seq_max(u32 a, u32 b) static bool need_peer_seq(struct drbd_conf *mdev) { struct drbd_tconn *tconn = mdev->tconn; + int tp; /* * We only need to keep track of the last packet_seq number of our peer * if we are in dual-primary mode and we have the discard flag set; see * handle_write_conflicts(). */ - return tconn->net_conf->two_primaries && - test_bit(DISCARD_CONCURRENT, &tconn->flags); + + rcu_read_lock(); + tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries; + rcu_read_unlock(); + + return tp && test_bit(DISCARD_CONCURRENT, &tconn->flags); } static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq) @@ -2049,7 +2054,7 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi) u32 peer_seq = be32_to_cpu(p->seq_num); int rw = WRITE; u32 dp_flags; - int err; + int err, tp; mdev = vnr_to_mdev(tconn, pi->vnr); if (!mdev) @@ -2094,7 +2099,11 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi) atomic_inc(&peer_req->epoch->active); spin_unlock(&mdev->epoch_lock); - if (mdev->tconn->net_conf->two_primaries) { + rcu_read_lock(); + tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries; + rcu_read_unlock(); + if (tp) { + peer_req->flags |= EE_IN_INTERVAL_TREE; err = wait_for_and_update_peer_seq(mdev, peer_seq); if (err) goto out_interrupted; From 71932efc1cfccfe1cc8e48b21f8cea5fbbc80e24 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 18 Apr 2011 09:43:25 +0200 Subject: [PATCH 330/609] drbd: allow status dump request all volumes of a specific resource We had drbd_adm_get_status (one single volume), and drbd_adm_get_status_all (dump of all volumes of all resources). This enhances the latter to be able to dump all volumes of just one specific resource. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 70 +++++++++++++++++++++++++++++++++--- 1 file changed, 66 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index f86e882efca..fff11ae79f1 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2598,7 +2598,7 @@ out: return 0; } -int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb) +int get_one_status(struct sk_buff *skb, struct netlink_callback *cb) { struct drbd_conf *mdev; struct drbd_genlmsghdr *dh; @@ -2616,6 +2616,9 @@ int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb) * where tconn is cb->args[0]; * and i is cb->args[1]; * + * cb->args[2] indicates if we shall loop over all resources, + * or just dump all volumes of a single resource. + * * This may miss entries inserted after this dump started, * or entries deleted before they are reached. * @@ -2626,7 +2629,6 @@ int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb) /* synchronize with drbd_new_tconn/drbd_free_tconn */ down_read(&drbd_cfg_rwsem); -next_tconn: /* revalidate iterator position */ list_for_each_entry(tmp, &drbd_tconns, all_tconn) { if (pos == NULL) { @@ -2641,16 +2643,22 @@ next_tconn: } } if (tconn) { +next_tconn: mdev = idr_get_next(&tconn->volumes, &volume); if (!mdev) { /* No more volumes to dump on this tconn. * Advance tconn iterator. */ pos = list_entry(tconn->all_tconn.next, struct drbd_tconn, all_tconn); - /* But, did we dump any volume on this tconn yet? */ + /* Did we dump any volume on this tconn yet? */ if (volume != 0) { - tconn = NULL; + /* If we reached the end of the list, + * or only a single resource dump was requested, + * we are done. */ + if (&pos->all_tconn == &drbd_tconns || cb->args[2]) + goto out; volume = 0; + tconn = pos; goto next_tconn; } } @@ -2696,6 +2704,60 @@ out: return skb->len; } +/* + * Request status of all resources, or of all volumes within a single resource. + * + * This is a dump, as the answer may not fit in a single reply skb otherwise. + * Which means we cannot use the family->attrbuf or other such members, because + * dump is NOT protected by the genl_lock(). During dump, we only have access + * to the incoming skb, and need to opencode "parsing" of the nlattr payload. + * + * Once things are setup properly, we call into get_one_status(). + */ +int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb) +{ + const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ; + struct nlattr *nla; + const char *conn_name; + struct drbd_tconn *tconn; + + /* Is this a followup call? */ + if (cb->args[0]) { + /* ... of a single resource dump, + * and the resource iterator has been advanced already? */ + if (cb->args[2] && cb->args[2] != cb->args[0]) + return 0; /* DONE. */ + goto dump; + } + + /* First call (from netlink_dump_start). We need to figure out + * which resource(s) the user wants us to dump. */ + nla = nla_find(nlmsg_attrdata(cb->nlh, hdrlen), + nlmsg_attrlen(cb->nlh, hdrlen), + DRBD_NLA_CFG_CONTEXT); + + /* No explicit context given. Dump all. */ + if (!nla) + goto dump; + nla = nla_find_nested(nla, __nla_type(T_ctx_conn_name)); + /* context given, but no name present? */ + if (!nla) + return -EINVAL; + conn_name = nla_data(nla); + tconn = conn_by_name(conn_name); + if (!tconn) + return -ENODEV; + + /* prime iterators, and set "filter" mode mark: + * only dump this tconn. */ + cb->args[0] = (long)tconn; + /* cb->args[1] = 0; passed in this way. */ + cb->args[2] = (long)tconn; + +dump: + return get_one_status(skb, cb); +} + int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; From ed439848ca6029c748b537e510a1ed5a506ea123 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Sat, 23 Apr 2011 14:45:14 +0200 Subject: [PATCH 331/609] drbd: fix setsockopt for user mode linux We use our own copy of kernel_setsockopt, and did not mess around with get_fs/set_fs, since we thought we knew we would always be KERNEL_DS anyways. Apparently not so for at least user mode linux, so put the set_fs(KERNEL_DS) in there. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 3833d56b8de..30801922a97 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1484,46 +1484,53 @@ static inline void drbd_flush_workqueue(struct drbd_conf *mdev) conn_flush_workqueue(mdev->tconn); } -/* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to - * mess with get_fs/set_fs, we know we are KERNEL_DS always. */ +/* Yes, there is kernel_setsockopt, but only since 2.6.18. + * So we have our own copy of it here. */ static inline int drbd_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, int optlen) + char *optval, int optlen) { + mm_segment_t oldfs = get_fs(); + char __user *uoptval; int err; + + uoptval = (char __user __force *)optval; + + set_fs(KERNEL_DS); if (level == SOL_SOCKET) - err = sock_setsockopt(sock, level, optname, optval, optlen); + err = sock_setsockopt(sock, level, optname, uoptval, optlen); else - err = sock->ops->setsockopt(sock, level, optname, optval, + err = sock->ops->setsockopt(sock, level, optname, uoptval, optlen); + set_fs(oldfs); return err; } static inline void drbd_tcp_cork(struct socket *sock) { - int __user val = 1; + int val = 1; (void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK, - (char __user *)&val, sizeof(val)); + (char*)&val, sizeof(val)); } static inline void drbd_tcp_uncork(struct socket *sock) { - int __user val = 0; + int val = 0; (void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK, - (char __user *)&val, sizeof(val)); + (char*)&val, sizeof(val)); } static inline void drbd_tcp_nodelay(struct socket *sock) { - int __user val = 1; + int val = 1; (void) drbd_setsockopt(sock, SOL_TCP, TCP_NODELAY, - (char __user *)&val, sizeof(val)); + (char*)&val, sizeof(val)); } static inline void drbd_tcp_quickack(struct socket *sock) { - int __user val = 2; + int val = 2; (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK, - (char __user *)&val, sizeof(val)); + (char*)&val, sizeof(val)); } void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo); From ae25b336e0e00eafd61f1cc1c3e9e1c0d2b8fc51 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Sun, 24 Apr 2011 00:01:16 +0200 Subject: [PATCH 332/609] drbd: cmdname() enum to string convertion was missing a few constants Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index ba9a8b7afed..bfea92297f6 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3246,8 +3246,17 @@ const char *cmdname(enum drbd_packet cmd) [P_DELAY_PROBE] = "DelayProbe", [P_OUT_OF_SYNC] = "OutOfSync", [P_RETRY_WRITE] = "RetryWrite", + [P_RS_CANCEL] = "RSCancel", + [P_CONN_ST_CHG_REQ] = "conn_st_chg_req", + [P_CONN_ST_CHG_REPLY] = "conn_st_chg_reply", + + /* enum drbd_packet, but not commands - obsoleted flags: + * P_MAY_IGNORE + * P_MAX_OPT_CMD + */ }; + /* too big for the array: 0xfffX */ if (cmd == P_INITIAL_META) return "InitialMeta"; if (cmd == P_INITIAL_DATA) From f5e2b8b3b6bed8c60103b4ed5341af072129d7c0 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Sun, 24 Apr 2011 20:52:20 +0200 Subject: [PATCH 333/609] drbd: move comment about stopping the receiver thread to where it belongs When the last volume of a replication group is unconfigured, the worker thread exits. To not interfere with cleanup of other threads, before the the last cleanups run, we need to make sure the receiver has already exited. The commend explaining that clearly belongs above drbd_thread_stop(&tconn->receiver), not in the cleanup loop below. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_worker.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 2c43cf0918c..9d1ba8ea681 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1744,12 +1744,13 @@ int drbd_worker(struct drbd_thread *thi) */ spin_unlock_irq(&tconn->data.work.q_lock); + /* _drbd_set_state only uses stop_nowait. + * wait here for the exiting receiver. */ drbd_thread_stop(&tconn->receiver); + down_read(&drbd_cfg_rwsem); idr_for_each_entry(&tconn->volumes, mdev, vnr) { D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); - /* _drbd_set_state only uses stop_nowait. - * wait here for the exiting receiver. */ drbd_mdev_cleanup(mdev); } up_read(&drbd_cfg_rwsem); From 1d041225999c6f6246494b02d8f9b405155b52e1 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 22 Apr 2011 15:20:23 +0200 Subject: [PATCH 334/609] drbd: Eliminated drbd_free_resoruces() it is superseeded by conn_free_crypto() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 - drivers/block/drbd/drbd_main.c | 35 ++++++++++++---------------------- 2 files changed, 12 insertions(+), 24 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 30801922a97..0fb3fc32a99 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1046,7 +1046,6 @@ extern void drbd_calc_cpu_mask(struct drbd_tconn *tconn); #define drbd_thread_current_set_cpu(A) ({}) #define drbd_calc_cpu_mask(A) ({}) #endif -extern void drbd_free_resources(struct drbd_conf *mdev); extern void tl_release(struct drbd_tconn *, unsigned int barrier_nr, unsigned int set_size); extern void tl_clear(struct drbd_tconn *); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index bfea92297f6..b9c103f16ae 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2057,7 +2057,9 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev) drbd_bm_cleanup(mdev); } - drbd_free_resources(mdev); + drbd_free_bc(mdev->ldev); + mdev->ldev = NULL; + clear_bit(AL_SUSPENDED, &mdev->flags); D_ASSERT(list_empty(&mdev->active_ee)); @@ -2252,7 +2254,8 @@ void drbd_delete_device(struct drbd_conf *mdev) if (mdev->this_bdev) bdput(mdev->this_bdev); - drbd_free_resources(mdev); + drbd_free_bc(mdev->ldev); + mdev->ldev = NULL; drbd_release_all_peer_reqs(mdev); @@ -2387,11 +2390,18 @@ static void drbd_free_socket(struct drbd_socket *socket) void conn_free_crypto(struct drbd_tconn *tconn) { + drbd_free_sock(tconn); + + crypto_free_hash(tconn->csums_tfm); + crypto_free_hash(tconn->verify_tfm); crypto_free_hash(tconn->cram_hmac_tfm); crypto_free_hash(tconn->integrity_w_tfm); crypto_free_hash(tconn->integrity_r_tfm); kfree(tconn->int_dig_in); kfree(tconn->int_dig_vv); + + tconn->csums_tfm = NULL; + tconn->verify_tfm = NULL; tconn->cram_hmac_tfm = NULL; tconn->integrity_w_tfm = NULL; tconn->integrity_r_tfm = NULL; @@ -2700,27 +2710,6 @@ void drbd_free_sock(struct drbd_tconn *tconn) } } - -void drbd_free_resources(struct drbd_conf *mdev) -{ - crypto_free_hash(mdev->tconn->csums_tfm); - mdev->tconn->csums_tfm = NULL; - crypto_free_hash(mdev->tconn->verify_tfm); - mdev->tconn->verify_tfm = NULL; - crypto_free_hash(mdev->tconn->cram_hmac_tfm); - mdev->tconn->cram_hmac_tfm = NULL; - crypto_free_hash(mdev->tconn->integrity_w_tfm); - mdev->tconn->integrity_w_tfm = NULL; - crypto_free_hash(mdev->tconn->integrity_r_tfm); - mdev->tconn->integrity_r_tfm = NULL; - - drbd_free_sock(mdev->tconn); - - __no_warn(local, - drbd_free_bc(mdev->ldev); - mdev->ldev = NULL;); -} - /* meta data management */ struct meta_data_on_disk { From 9dc9fbb35733c8ea97fe9b1cfc5499c7a625805c Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 22 Apr 2011 15:23:32 +0200 Subject: [PATCH 335/609] drbd: Basic refcounting for drbd_tconn References hold by: * Each (running) drbd thread has a reference on tconn * Each mdev has a referenc on tconn * Beeing in the all_tconn list counts for one reference * Each after_conn_state_chg_work has a reference to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 5 +++-- drivers/block/drbd/drbd_main.c | 20 +++++++++++++++++--- drivers/block/drbd/drbd_nl.c | 22 +++++++++++++--------- drivers/block/drbd/drbd_state.c | 2 ++ 4 files changed, 35 insertions(+), 14 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 0fb3fc32a99..3abf982ec55 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -828,6 +828,7 @@ enum { struct drbd_tconn { /* is a resource from the config file */ char *name; /* Resource name */ struct list_head all_tconn; /* linked on global drbd_tconns */ + struct kref kref; struct idr volumes; /* to mdev mapping */ enum drbd_conns cstate; /* Only C_STANDALONE to C_WF_REPORT_PARAMS */ unsigned susp:1; /* IO suspended by user */ @@ -1378,8 +1379,8 @@ extern int conn_lowest_minor(struct drbd_tconn *tconn); enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr); extern void drbd_delete_device(struct drbd_conf *mdev); -struct drbd_tconn *drbd_new_tconn(const char *name); -extern void drbd_free_tconn(struct drbd_tconn *tconn); +struct drbd_tconn *conn_create(const char *name); +extern void conn_destroy(struct kref *kref); struct drbd_tconn *conn_by_name(const char *name); extern void conn_free_crypto(struct drbd_tconn *tconn); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index b9c103f16ae..11427f59c5a 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -509,6 +509,8 @@ restart: conn_info(tconn, "Terminating %s\n", current->comm); /* Release mod reference taken when thread was started */ + + kref_put(&tconn->kref, &conn_destroy); module_put(THIS_MODULE); return retval; } @@ -546,6 +548,8 @@ int drbd_thread_start(struct drbd_thread *thi) return false; } + kref_get(&thi->tconn->kref); + init_completion(&thi->stop); thi->reset_cpu_mask = 1; thi->t_state = RUNNING; @@ -558,6 +562,7 @@ int drbd_thread_start(struct drbd_thread *thi) if (IS_ERR(nt)) { conn_err(tconn, "Couldn't start thread\n"); + kref_put(&tconn->kref, &conn_destroy); module_put(THIS_MODULE); return false; } @@ -2237,6 +2242,8 @@ static void drbd_release_all_peer_reqs(struct drbd_conf *mdev) /* caution. no locking. */ void drbd_delete_device(struct drbd_conf *mdev) { + struct drbd_tconn *tconn = mdev->tconn; + idr_remove(&mdev->tconn->volumes, mdev->vnr); idr_remove(&minors, mdev_to_minor(mdev)); synchronize_rcu(); @@ -2272,6 +2279,8 @@ void drbd_delete_device(struct drbd_conf *mdev) put_disk(mdev->vdisk); blk_cleanup_queue(mdev->rq_queue); kfree(mdev); + + kref_put(&tconn->kref, &conn_destroy); } static void drbd_cleanup(void) @@ -2409,7 +2418,7 @@ void conn_free_crypto(struct drbd_tconn *tconn) tconn->int_dig_vv = NULL; } -struct drbd_tconn *drbd_new_tconn(const char *name) +struct drbd_tconn *conn_create(const char *name) { struct drbd_tconn *tconn; @@ -2455,6 +2464,7 @@ struct drbd_tconn *drbd_new_tconn(const char *name) }; down_write(&drbd_cfg_rwsem); + kref_init(&tconn->kref); list_add_tail(&tconn->all_tconn, &drbd_tconns); up_write(&drbd_cfg_rwsem); @@ -2471,9 +2481,10 @@ fail: return NULL; } -void drbd_free_tconn(struct drbd_tconn *tconn) +void conn_destroy(struct kref *kref) { - list_del(&tconn->all_tconn); + struct drbd_tconn *tconn = container_of(kref, struct drbd_tconn, kref); + idr_destroy(&tconn->volumes); free_cpumask_var(tconn->cpu_mask); @@ -2503,7 +2514,9 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, if (!mdev) return ERR_NOMEM; + kref_get(&tconn->kref); mdev->tconn = tconn; + mdev->minor = minor; mdev->vnr = vnr; @@ -2605,6 +2618,7 @@ out_no_disk: blk_cleanup_queue(q); out_no_q: kfree(mdev); + kref_put(&tconn->kref, &conn_destroy); return err; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index fff11ae79f1..23c34baa75a 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -479,6 +479,7 @@ static int _try_outdate_peer_async(void *data) conn_try_outdate_peer(tconn); + kref_put(&tconn->kref, &conn_destroy); return 0; } @@ -486,9 +487,12 @@ void conn_try_outdate_peer_async(struct drbd_tconn *tconn) { struct task_struct *opa; + kref_get(&tconn->kref); opa = kthread_run(_try_outdate_peer_async, tconn, "drbd_async_h"); - if (IS_ERR(opa)) + if (IS_ERR(opa)) { conn_err(tconn, "out of mem, failed to invoke fence-peer helper\n"); + kref_put(&tconn->kref, &conn_destroy); + } } enum drbd_state_rv @@ -2627,7 +2631,7 @@ int get_one_status(struct sk_buff *skb, struct netlink_callback *cb) * on each iteration. */ - /* synchronize with drbd_new_tconn/drbd_free_tconn */ + /* synchronize with conn_create()/conn_destroy() */ down_read(&drbd_cfg_rwsem); /* revalidate iterator position */ list_for_each_entry(tmp, &drbd_tconns, all_tconn) { @@ -2932,7 +2936,7 @@ int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info) goto out; } - if (!drbd_new_tconn(adm_ctx.conn_name)) + if (!conn_create(adm_ctx.conn_name)) retcode = ERR_NOMEM; out: drbd_adm_finish(info, retcode); @@ -3005,10 +3009,6 @@ int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info) down_write(&drbd_cfg_rwsem); retcode = adm_delete_minor(adm_ctx.mdev); up_write(&drbd_cfg_rwsem); - /* if this was the last volume of this connection, - * this will terminate all threads */ - if (retcode == NO_ERROR) - conn_reconfig_done(adm_ctx.tconn); out: drbd_adm_finish(info, retcode); return 0; @@ -3078,7 +3078,9 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) /* delete connection */ if (conn_lowest_minor(adm_ctx.tconn) < 0) { - drbd_free_tconn(adm_ctx.tconn); + list_del(&adm_ctx.tconn->all_tconn); + kref_put(&adm_ctx.tconn->kref, &conn_destroy); + retcode = NO_ERROR; } else { /* "can not happen" */ @@ -3107,7 +3109,9 @@ int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info) down_write(&drbd_cfg_rwsem); if (conn_lowest_minor(adm_ctx.tconn) < 0) { - drbd_free_tconn(adm_ctx.tconn); + list_del(&adm_ctx.tconn->all_tconn); + kref_put(&adm_ctx.tconn->kref, &conn_destroy); + retcode = NO_ERROR; } else { retcode = ERR_CONN_IN_USE; diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index f20a4a3807e..d7a330e0135 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1460,6 +1460,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) //conn_err(tconn, STATE_FMT, STATE_ARGS("nms", nms)); after_all_state_ch(tconn); + kref_put(&tconn->kref, &conn_destroy); return 0; } @@ -1686,6 +1687,7 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ acscw->ns_max = ns_max; acscw->flags = flags; acscw->w.cb = w_after_conn_state_ch; + kref_get(&tconn->kref); acscw->w.tconn = tconn; drbd_queue_work(&tconn->data.work, &acscw->w); } else { From 0ace9dfabec3c1e96a1cd9fe0791ecbe6737c2f9 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Sun, 24 Apr 2011 10:53:19 +0200 Subject: [PATCH 336/609] drbd: Take a reference on tconn when finding a tconn by name Rule #3 of kref.txt Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 6 ++++-- drivers/block/drbd/drbd_nl.c | 15 +++++++++++---- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 3abf982ec55..7797879d326 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1381,7 +1381,7 @@ extern void drbd_delete_device(struct drbd_conf *mdev); struct drbd_tconn *conn_create(const char *name); extern void conn_destroy(struct kref *kref); -struct drbd_tconn *conn_by_name(const char *name); +struct drbd_tconn *conn_get_by_name(const char *name); extern void conn_free_crypto(struct drbd_tconn *tconn); extern int proc_details; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 11427f59c5a..f0a0e1759ba 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2362,7 +2362,7 @@ static void drbd_init_workqueue(struct drbd_work_queue* wq) INIT_LIST_HEAD(&wq->q); } -struct drbd_tconn *conn_by_name(const char *name) +struct drbd_tconn *conn_get_by_name(const char *name) { struct drbd_tconn *tconn; @@ -2371,8 +2371,10 @@ struct drbd_tconn *conn_by_name(const char *name) down_read(&drbd_cfg_rwsem); list_for_each_entry(tconn, &drbd_tconns, all_tconn) { - if (!strcmp(tconn->name, name)) + if (!strcmp(tconn->name, name)) { + kref_get(&tconn->kref); goto found; + } } tconn = NULL; found: diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 23c34baa75a..272c4a08ee4 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -195,7 +195,7 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, adm_ctx.minor = d_in->minor; adm_ctx.mdev = minor_to_mdev(d_in->minor); - adm_ctx.tconn = conn_by_name(adm_ctx.conn_name); + adm_ctx.tconn = conn_get_by_name(adm_ctx.conn_name); if (!adm_ctx.mdev && (flags & DRBD_ADM_NEED_MINOR)) { drbd_msg_put_info("unknown minor"); @@ -223,8 +223,7 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, drbd_msg_put_info("minor exists as different volume"); return ERR_INVALID_REQUEST; } - if (adm_ctx.mdev && !adm_ctx.tconn) - adm_ctx.tconn = adm_ctx.mdev->tconn; + return NO_ERROR; fail: @@ -238,6 +237,11 @@ static int drbd_adm_finish(struct genl_info *info, int retcode) struct nlattr *nla; const char *conn_name = NULL; + if (adm_ctx.tconn) { + kref_put(&adm_ctx.tconn->kref, &conn_destroy); + adm_ctx.tconn = NULL; + } + if (!adm_ctx.reply_skb) return -ENOMEM; @@ -2748,10 +2752,13 @@ int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb) if (!nla) return -EINVAL; conn_name = nla_data(nla); - tconn = conn_by_name(conn_name); + tconn = conn_get_by_name(conn_name); + if (!tconn) return -ENODEV; + kref_put(&tconn->kref, &conn_destroy); /* get_one_status() (re)validates tconn by itself */ + /* prime iterators, and set "filter" mode mark: * only dump this tconn. */ cb->args[0] = (long)tconn; From a18e9d1eb0660621eb9911e59a9b4d664cbad4d9 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Sun, 24 Apr 2011 11:09:55 +0200 Subject: [PATCH 337/609] drbd: Removed the OBJECT_DYING and the CONFIG_PENDING bits superseded by refcounting Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 6 ------ drivers/block/drbd/drbd_nl.c | 20 +++----------------- drivers/block/drbd/drbd_state.c | 16 +--------------- drivers/block/drbd/drbd_worker.c | 3 --- 4 files changed, 4 insertions(+), 41 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 7797879d326..28e7ecc9188 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -816,12 +816,6 @@ enum { GOT_PING_ACK, /* set when we receive a ping_ack packet, ping_wait gets woken */ CONN_WD_ST_CHG_OKAY, CONN_WD_ST_CHG_FAIL, - CONFIG_PENDING, /* serialization of (re)configuration requests. - * if set, also prevents the device from dying */ - OBJECT_DYING, /* device became unconfigured, - * but worker thread is still handling the cleanup. - * reconfiguring (nl_disk_conf, nl_net_conf) is dissalowed, - * while this is set. */ CONN_DRY_RUN, /* Expect disconnect after resync handshake. */ }; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 272c4a08ee4..caaef71ec61 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1040,34 +1040,20 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev) drbd_setup_queue_param(mdev, new); } -/* serialize deconfig (worker exiting, doing cleanup) - * and reconfig (drbdsetup disk, drbdsetup net) - * - * Wait for a potentially exiting worker, then restart it, - * or start a new one. Flush any pending work, there may still be an - * after_state_change queued. - */ +/* Starts the worker thread */ static void conn_reconfig_start(struct drbd_tconn *tconn) { - wait_event(tconn->ping_wait, !test_and_set_bit(CONFIG_PENDING, &tconn->flags)); - wait_event(tconn->ping_wait, !test_bit(OBJECT_DYING, &tconn->flags)); drbd_thread_start(&tconn->worker); conn_flush_workqueue(tconn); } -/* if still unconfigured, stops worker again. - * if configured now, clears CONFIG_PENDING. - * wakes potential waiters */ +/* if still unconfigured, stops worker again. */ static void conn_reconfig_done(struct drbd_tconn *tconn) { spin_lock_irq(&tconn->req_lock); - if (conn_all_vols_unconf(tconn)) { - set_bit(OBJECT_DYING, &tconn->flags); + if (conn_all_vols_unconf(tconn)) drbd_thread_stop_nowait(&tconn->worker); - } else - clear_bit(CONFIG_PENDING, &tconn->flags); spin_unlock_irq(&tconn->req_lock); - wake_up(&tconn->ping_wait); } /* Make sure IO is suspended before calling this function(). */ diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index d7a330e0135..05628b45cf0 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -926,18 +926,6 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, mdev->tconn->susp_nod = ns.susp_nod; mdev->tconn->susp_fen = ns.susp_fen; - /* solve the race between becoming unconfigured, - * worker doing the cleanup, and - * admin reconfiguring us: - * on (re)configure, first set CONFIG_PENDING, - * then wait for a potentially exiting worker, - * start the worker, and schedule one no_op. - * then proceed with configuration. - */ - if(conn_all_vols_unconf(mdev->tconn) && - !test_and_set_bit(CONFIG_PENDING, &mdev->tconn->flags)) - set_bit(OBJECT_DYING, &mdev->tconn->flags); - if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) drbd_print_uuids(mdev, "attached to UUIDs"); @@ -1401,10 +1389,8 @@ struct after_conn_state_chg_work { static void after_all_state_ch(struct drbd_tconn *tconn) { - if (conn_all_vols_unconf(tconn) && - test_bit(OBJECT_DYING, &tconn->flags)) { + if (conn_all_vols_unconf(tconn)) drbd_thread_stop_nowait(&tconn->worker); - } } static int w_after_conn_state_ch(struct drbd_work *w, int unused) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 9d1ba8ea681..7a73bd4287c 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1754,9 +1754,6 @@ int drbd_worker(struct drbd_thread *thi) drbd_mdev_cleanup(mdev); } up_read(&drbd_cfg_rwsem); - clear_bit(OBJECT_DYING, &tconn->flags); - clear_bit(CONFIG_PENDING, &tconn->flags); - wake_up(&tconn->ping_wait); return 0; } From 5ee743e92d3b170a67b58c0c6ea169300983f58e Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 26 Apr 2011 16:22:25 +0200 Subject: [PATCH 338/609] drbd: remove useless kobject_uevent from drbd_adm_connect Calling kobject_uevent, which may sleep, from within rcu_read_lock() protected regions is not possible. This particular kobject_uevent also is also wrong. It was supposed to trigger a udev run, just in case something relevant to udev symlink magic has changed, when adjusting runtime re-configurable settings while we still had the "syncer conf". It was improperly placed in connect when we dropped the "syncer conf". The right thing to do is probably to call "udevadm trigger" directly in those cases where drbdadm thinks there was a need to trigger extra udev runs. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index caaef71ec61..1a8fb7a0db5 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2043,15 +2043,15 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) mutex_unlock(&tconn->net_conf_update); - retcode = conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE); - rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, i) { mdev->send_cnt = 0; mdev->recv_cnt = 0; - kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); } rcu_read_unlock(); + + retcode = conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE); + conn_reconfig_done(tconn); drbd_adm_finish(info, retcode); return 0; From d9cc6e231897a9ab1a94e6cfe12d71bfec0b7a81 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 27 Apr 2011 10:25:28 +0200 Subject: [PATCH 339/609] drbd: fix various disconnecting races If an admin requests disconnect at a time when the state handling already disconnects/reconnects, there have been some races. Make sure to always really stop the network threads before returning success for disconnect. Do not pretend successfull forced disconnect, if the state handling returned an error. Return success from drbd_adm_down() only after all threads are finished. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 21 ++++++++++++--------- drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/drbd/drbd_state.c | 20 +++++++++++++------- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 1a8fb7a0db5..a16089ce0a5 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2075,10 +2075,9 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool for enum drbd_state_rv rv; if (force) { spin_lock_irq(&tconn->req_lock); - if (tconn->cstate >= C_WF_CONNECTION) - _conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); + rv = _conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); spin_unlock_irq(&tconn->req_lock); - return SS_SUCCESS; + return rv; } rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING), 0); @@ -2137,10 +2136,12 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) if (rv < SS_SUCCESS) goto fail; + /* No one else can reconfigure the network while I am here. + * The state handling only uses drbd_thread_stop_nowait(), + * we want to really wait here until the receiver is no more. */ + drbd_thread_stop(&tconn->receiver); if (wait_event_interruptible(tconn->ping_wait, - tconn->cstate != C_DISCONNECTING)) { - /* Do not test for mdev->state.conn == C_STANDALONE, since - someone else might connect us in the mean time! */ + tconn->cstate == C_STANDALONE)) { retcode = ERR_INTR; goto fail; } @@ -3043,6 +3044,10 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) goto out_unlock; } + /* Make sure the network threads have actually stopped, + * state handling only does drbd_thread_stop_nowait(). */ + drbd_thread_stop(&adm_ctx.tconn->receiver); + /* detach */ idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { rv = adm_detach(mdev); @@ -3066,11 +3071,9 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) } } - /* stop all threads */ - conn_reconfig_done(adm_ctx.tconn); - /* delete connection */ if (conn_lowest_minor(adm_ctx.tconn) < 0) { + drbd_thread_stop(&adm_ctx.tconn->worker); list_del(&adm_ctx.tconn->all_tconn); kref_put(&adm_ctx.tconn->kref, &conn_destroy); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 4665ad79b4a..6da7aebde8d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4226,7 +4226,7 @@ static void drbd_disconnect(struct drbd_tconn *tconn) synchronize_rcu(); kfree(old_conf); - conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE); + conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD); } } diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 05628b45cf0..8b0f31b6808 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -604,21 +604,27 @@ is_valid_soft_transition(union drbd_state os, union drbd_state ns) static enum drbd_state_rv is_valid_conn_transition(enum drbd_conns oc, enum drbd_conns nc) { - enum drbd_state_rv rv = SS_SUCCESS; + /* no change -> nothing to do, at least for the connection part */ + if (oc == nc) + return SS_NOTHING_TO_DO; - /* Disallow Network errors to configure a device's network part */ - if ((nc >= C_TIMEOUT && nc <= C_TEAR_DOWN) && oc <= C_DISCONNECTING) - rv = SS_NEED_CONNECTION; + /* disconnect of an unconfigured connection does not make sense */ + if (oc == C_STANDALONE && nc == C_DISCONNECTING) + return SS_ALREADY_STANDALONE; + + /* from C_STANDALONE, we start with C_UNCONNECTED */ + if (oc == C_STANDALONE && nc != C_UNCONNECTED) + return SS_NEED_CONNECTION; /* After a network error only C_UNCONNECTED or C_DISCONNECTING may follow. */ if (oc >= C_TIMEOUT && oc <= C_TEAR_DOWN && nc != C_UNCONNECTED && nc != C_DISCONNECTING) - rv = SS_IN_TRANSIENT_STATE; + return SS_IN_TRANSIENT_STATE; /* After C_DISCONNECTING only C_STANDALONE may follow */ if (oc == C_DISCONNECTING && nc != C_STANDALONE) - rv = SS_IN_TRANSIENT_STATE; + return SS_IN_TRANSIENT_STATE; - return rv; + return SS_SUCCESS; } From 0fd0ea064c30022577f62d9060889d964123be6f Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 27 Apr 2011 11:27:47 +0200 Subject: [PATCH 340/609] drbd: Consider all crypto options in connect and in net-options So for this was simply not considered after the options have been re-arranged. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 230 ++++++++++++++++++----------------- 1 file changed, 121 insertions(+), 109 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index a16089ce0a5..f85cef44838 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1727,6 +1727,88 @@ check_net_options(struct drbd_tconn *tconn, struct net_conf *new_conf) return rv; } +struct crypto { + struct crypto_hash *verify_tfm; + struct crypto_hash *csums_tfm; + struct crypto_hash *cram_hmac_tfm; + struct crypto_hash *integrity_w_tfm; + struct crypto_hash *integrity_r_tfm; + void *int_dig_in; + void *int_dig_vv; +}; + +static int +alloc_tfm(struct crypto_hash **tfm, char *tfm_name, int err_alg, int err_nd) +{ + if (!tfm_name[0]) + return NO_ERROR; + + *tfm = crypto_alloc_hash(tfm_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(*tfm)) { + *tfm = NULL; + return err_alg; + } + + if (!drbd_crypto_is_hash(crypto_hash_tfm(*tfm))) + return err_nd; + + return NO_ERROR; +} + +static enum drbd_ret_code +alloc_crypto(struct crypto *crypto, struct net_conf *new_conf) +{ + char hmac_name[CRYPTO_MAX_ALG_NAME]; + enum drbd_ret_code rv; + int hash_size; + + rv = alloc_tfm(&crypto->csums_tfm, new_conf->csums_alg, + ERR_CSUMS_ALG, ERR_CSUMS_ALG_ND); + if (rv != NO_ERROR) + return rv; + rv = alloc_tfm(&crypto->verify_tfm, new_conf->verify_alg, + ERR_VERIFY_ALG, ERR_VERIFY_ALG_ND); + if (rv != NO_ERROR) + return rv; + rv = alloc_tfm(&crypto->integrity_w_tfm, new_conf->integrity_alg, + ERR_INTEGRITY_ALG, ERR_INTEGRITY_ALG_ND); + if (rv != NO_ERROR) + return rv; + rv = alloc_tfm(&crypto->integrity_r_tfm, new_conf->integrity_alg, + ERR_INTEGRITY_ALG, ERR_INTEGRITY_ALG_ND); + if (rv != NO_ERROR) + return rv; + if (new_conf->cram_hmac_alg[0] != 0) { + snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)", + new_conf->cram_hmac_alg); + + rv = alloc_tfm(&crypto->cram_hmac_tfm, hmac_name, + ERR_AUTH_ALG, ERR_AUTH_ALG_ND); + } + if (crypto->integrity_w_tfm) { + hash_size = crypto_hash_digestsize(crypto->integrity_w_tfm); + crypto->int_dig_in = kmalloc(hash_size, GFP_KERNEL); + if (!crypto->int_dig_in) + return ERR_NOMEM; + crypto->int_dig_vv = kmalloc(hash_size, GFP_KERNEL); + if (!crypto->int_dig_vv) + return ERR_NOMEM; + } + + return rv; +} + +static void free_crypto(struct crypto *crypto) +{ + kfree(crypto->int_dig_in); + kfree(crypto->int_dig_vv); + crypto_free_hash(crypto->cram_hmac_tfm); + crypto_free_hash(crypto->integrity_w_tfm); + crypto_free_hash(crypto->integrity_r_tfm); + crypto_free_hash(crypto->csums_tfm); + crypto_free_hash(crypto->verify_tfm); +} + int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; @@ -1735,9 +1817,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) int err; int ovr; /* online verify running */ int rsr; /* re-sync running */ - struct crypto_hash *verify_tfm = NULL; - struct crypto_hash *csums_tfm = NULL; - + struct crypto crypto = { }; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); if (!adm_ctx.reply_skb) @@ -1779,61 +1859,49 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) /* re-sync running */ rsr = conn_resync_running(tconn); - if (rsr && old_conf && strcmp(new_conf->csums_alg, old_conf->csums_alg)) { + if (rsr && strcmp(new_conf->csums_alg, old_conf->csums_alg)) { retcode = ERR_CSUMS_RESYNC_RUNNING; goto fail; } - if (!rsr && new_conf->csums_alg[0]) { - csums_tfm = crypto_alloc_hash(new_conf->csums_alg, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(csums_tfm)) { - csums_tfm = NULL; - retcode = ERR_CSUMS_ALG; - goto fail; - } - - if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) { - retcode = ERR_CSUMS_ALG_ND; - goto fail; - } - } - /* online verify running */ ovr = conn_ov_running(tconn); - if (ovr) { - if (strcmp(new_conf->verify_alg, old_conf->verify_alg)) { - retcode = ERR_VERIFY_RUNNING; - goto fail; - } + if (ovr && strcmp(new_conf->verify_alg, old_conf->verify_alg)) { + retcode = ERR_VERIFY_RUNNING; + goto fail; } - if (!ovr && new_conf->verify_alg[0]) { - verify_tfm = crypto_alloc_hash(new_conf->verify_alg, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(verify_tfm)) { - verify_tfm = NULL; - retcode = ERR_VERIFY_ALG; - goto fail; - } - - if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) { - retcode = ERR_VERIFY_ALG_ND; - goto fail; - } - } + retcode = alloc_crypto(&crypto, new_conf); + if (retcode != NO_ERROR) + goto fail; rcu_assign_pointer(tconn->net_conf, new_conf); if (!rsr) { crypto_free_hash(tconn->csums_tfm); - tconn->csums_tfm = csums_tfm; - csums_tfm = NULL; + tconn->csums_tfm = crypto.csums_tfm; + crypto.csums_tfm = NULL; } if (!ovr) { crypto_free_hash(tconn->verify_tfm); - tconn->verify_tfm = verify_tfm; - verify_tfm = NULL; + tconn->verify_tfm = crypto.verify_tfm; + crypto.verify_tfm = NULL; } + /* FIXME can not assign these so bluntly while we have ongoing IO */ + kfree(tconn->int_dig_in); + tconn->int_dig_in = crypto.int_dig_in; + kfree(tconn->int_dig_vv); + tconn->int_dig_vv = crypto.int_dig_vv; + crypto_free_hash(tconn->integrity_w_tfm); + tconn->integrity_w_tfm = crypto.integrity_w_tfm; + crypto_free_hash(tconn->integrity_r_tfm); + tconn->integrity_r_tfm = crypto.integrity_r_tfm; + + /* FIXME Changing cram_hmac while the connection is established is useless */ + crypto_free_hash(tconn->cram_hmac_tfm); + tconn->cram_hmac_tfm = crypto.cram_hmac_tfm; + mutex_unlock(&tconn->net_conf_update); synchronize_rcu(); kfree(old_conf); @@ -1845,8 +1913,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) fail: mutex_unlock(&tconn->net_conf_update); - crypto_free_hash(csums_tfm); - crypto_free_hash(verify_tfm); + free_crypto(&crypto); kfree(new_conf); done: conn_reconfig_done(tconn); @@ -1857,14 +1924,9 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) { - char hmac_name[CRYPTO_MAX_ALG_NAME]; struct drbd_conf *mdev; struct net_conf *old_conf, *new_conf = NULL; - struct crypto_hash *tfm = NULL; - struct crypto_hash *integrity_w_tfm = NULL; - struct crypto_hash *integrity_r_tfm = NULL; - void *int_dig_in = NULL; - void *int_dig_vv = NULL; + struct crypto crypto = { }; struct drbd_tconn *oconn; struct drbd_tconn *tconn; struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr; @@ -1969,60 +2031,12 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) goto fail; } - if (new_conf->cram_hmac_alg[0] != 0) { - snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)", - new_conf->cram_hmac_alg); - tfm = crypto_alloc_hash(hmac_name, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) { - tfm = NULL; - retcode = ERR_AUTH_ALG; - goto fail; - } - - if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) { - retcode = ERR_AUTH_ALG_ND; - goto fail; - } - } - - if (new_conf->integrity_alg[0]) { - integrity_w_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(integrity_w_tfm)) { - integrity_w_tfm = NULL; - retcode=ERR_INTEGRITY_ALG; - goto fail; - } - - if (!drbd_crypto_is_hash(crypto_hash_tfm(integrity_w_tfm))) { - retcode=ERR_INTEGRITY_ALG_ND; - goto fail; - } - - integrity_r_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(integrity_r_tfm)) { - integrity_r_tfm = NULL; - retcode=ERR_INTEGRITY_ALG; - goto fail; - } - } + retcode = alloc_crypto(&crypto, new_conf); + if (retcode != NO_ERROR) + goto fail; ((char *)new_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0; - /* allocation not in the IO path, cqueue thread context */ - if (integrity_w_tfm) { - i = crypto_hash_digestsize(integrity_w_tfm); - int_dig_in = kmalloc(i, GFP_KERNEL); - if (!int_dig_in) { - retcode = ERR_NOMEM; - goto fail; - } - int_dig_vv = kmalloc(i, GFP_KERNEL); - if (!int_dig_vv) { - retcode = ERR_NOMEM; - goto fail; - } - } - conn_flush_workqueue(tconn); mutex_lock(&tconn->net_conf_update); @@ -2035,11 +2049,13 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(tconn->net_conf, new_conf); conn_free_crypto(tconn); - tconn->cram_hmac_tfm = tfm; - tconn->integrity_w_tfm = integrity_w_tfm; - tconn->integrity_r_tfm = integrity_r_tfm; - tconn->int_dig_in = int_dig_in; - tconn->int_dig_vv = int_dig_vv; + tconn->int_dig_in = crypto.int_dig_in; + tconn->int_dig_vv = crypto.int_dig_vv; + tconn->cram_hmac_tfm = crypto.cram_hmac_tfm; + tconn->integrity_w_tfm = crypto.integrity_w_tfm; + tconn->integrity_r_tfm = crypto.integrity_r_tfm; + tconn->csums_tfm = crypto.csums_tfm; + tconn->verify_tfm = crypto.verify_tfm; mutex_unlock(&tconn->net_conf_update); @@ -2057,11 +2073,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) return 0; fail: - kfree(int_dig_in); - kfree(int_dig_vv); - crypto_free_hash(tfm); - crypto_free_hash(integrity_w_tfm); - crypto_free_hash(integrity_r_tfm); + free_crypto(&crypto); kfree(new_conf); conn_reconfig_done(tconn); From 5979e36155f7875f0544f3c696f8ce863417cc68 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 27 Apr 2011 21:09:55 +0200 Subject: [PATCH 341/609] drbd: on reconfiguration requests, mind the SET_DEFAULTS flag The DRBD_GENL_F_SET_DEFAULTS flag was ignored for drbd_adm_disk_opts() and drbd_adm_net_opts(). Factor out drbd_set_*_defaults() helper functions, and call them appropriately. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_main.c | 5 +- drivers/block/drbd/drbd_nl.c | 150 +++++++++++++++++++-------------- 3 files changed, 91 insertions(+), 65 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 28e7ecc9188..e90bc9f93de 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1389,6 +1389,7 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t); /* drbd_nl.c */ +extern void drbd_set_res_opts_default(struct res_opts *r); extern int drbd_msg_put_info(const char *info); extern void drbd_suspend_io(struct drbd_conf *mdev); extern void drbd_resume_io(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f0a0e1759ba..7e56f88c0b6 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2460,10 +2460,7 @@ struct drbd_tconn *conn_create(const char *name) drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker"); drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender"); - tconn->res_opts = (struct res_opts) { - {}, 0, /* cpu_mask */ - DRBD_ON_NO_DATA_DEF, /* on_no_data */ - }; + drbd_set_res_opts_default(&tconn->res_opts); down_write(&drbd_cfg_rwsem); kref_init(&tconn->kref); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index f85cef44838..ac41aca72cb 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1077,6 +1077,84 @@ static void drbd_suspend_al(struct drbd_conf *mdev) dev_info(DEV, "Suspended AL updates\n"); } + +static bool should_set_defaults(struct genl_info *info) +{ + unsigned flags = ((struct drbd_genlmsghdr*)info->userhdr)->flags; + return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS); +} + +/* Maybe we should we generate these functions + * from the drbd_genl.h magic as well? + * That way we would not "accidentally forget" to add defaults here. */ + +#define RESET_ARRAY_FIELD(field) do { \ + memset(field, 0, sizeof(field)); \ + field ## _len = 0; \ +} while (0) +void drbd_set_res_opts_default(struct res_opts *r) +{ + RESET_ARRAY_FIELD(r->cpu_mask); + r->on_no_data = DRBD_ON_NO_DATA_DEF; +} + +static void drbd_set_net_conf_defaults(struct net_conf *nc) +{ + /* Do NOT (re)set those fields marked as GENLA_F_INVARIANT + * in drbd_genl.h, they can only be change with disconnect/reconnect */ + RESET_ARRAY_FIELD(nc->shared_secret); + + RESET_ARRAY_FIELD(nc->cram_hmac_alg); + RESET_ARRAY_FIELD(nc->integrity_alg); + RESET_ARRAY_FIELD(nc->verify_alg); + RESET_ARRAY_FIELD(nc->csums_alg); +#undef RESET_ARRAY_FIELD + + nc->wire_protocol = DRBD_PROTOCOL_DEF; + nc->try_connect_int = DRBD_CONNECT_INT_DEF; + nc->timeout = DRBD_TIMEOUT_DEF; + nc->ping_int = DRBD_PING_INT_DEF; + nc->ping_timeo = DRBD_PING_TIMEO_DEF; + nc->sndbuf_size = DRBD_SNDBUF_SIZE_DEF; + nc->rcvbuf_size = DRBD_RCVBUF_SIZE_DEF; + nc->ko_count = DRBD_KO_COUNT_DEF; + nc->max_buffers = DRBD_MAX_BUFFERS_DEF; + nc->max_epoch_size = DRBD_MAX_EPOCH_SIZE_DEF; + nc->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF; + nc->after_sb_0p = DRBD_AFTER_SB_0P_DEF; + nc->after_sb_1p = DRBD_AFTER_SB_1P_DEF; + nc->after_sb_2p = DRBD_AFTER_SB_2P_DEF; + nc->rr_conflict = DRBD_RR_CONFLICT_DEF; + nc->on_congestion = DRBD_ON_CONGESTION_DEF; + nc->cong_fill = DRBD_CONG_FILL_DEF; + nc->cong_extents = DRBD_CONG_EXTENTS_DEF; + nc->two_primaries = 0; + nc->no_cork = 0; + nc->always_asbp = 0; + nc->use_rle = 0; +} + +static void drbd_set_disk_conf_defaults(struct disk_conf *dc) +{ + /* Do NOT (re)set those fields marked as GENLA_F_INVARIANT + * in drbd_genl.h, they can only be change with detach/reattach */ + dc->on_io_error = DRBD_ON_IO_ERROR_DEF; + dc->fencing = DRBD_FENCING_DEF; + dc->resync_rate = DRBD_RATE_DEF; + dc->resync_after = DRBD_AFTER_DEF; + dc->al_extents = DRBD_AL_EXTENTS_DEF; + dc->c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF; + dc->c_delay_target = DRBD_C_DELAY_TARGET_DEF; + dc->c_fill_target = DRBD_C_FILL_TARGET_DEF; + dc->c_max_rate = DRBD_C_MAX_RATE_DEF; + dc->c_min_rate = DRBD_C_MIN_RATE_DEF; + dc->no_disk_barrier = 0; + dc->no_disk_flush = 0; + dc->no_disk_drain = 0; + dc->no_md_flush = 0; +} + + int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; @@ -1113,6 +1191,9 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) } memcpy(ndc, &mdev->ldev->dc, sizeof(*ndc)); + if (should_set_defaults(info)) + drbd_set_disk_conf_defaults(ndc); + err = disk_conf_from_attrs_for_change(ndc, info); if (err) { retcode = ERR_MANDATORY_TAG; @@ -1228,27 +1309,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto fail; } - nbc->dc = (struct disk_conf) { - {}, 0, /* backing_dev */ - {}, 0, /* meta_dev */ - 0, /* meta_dev_idx */ - DRBD_DISK_SIZE_SECT_DEF, /* disk_size */ - DRBD_MAX_BIO_BVECS_DEF, /* max_bio_bvecs */ - DRBD_ON_IO_ERROR_DEF, /* on_io_error */ - DRBD_FENCING_DEF, /* fencing */ - DRBD_RATE_DEF, /* resync_rate */ - DRBD_AFTER_DEF, /* resync_after */ - DRBD_AL_EXTENTS_DEF, /* al_extents */ - DRBD_C_PLAN_AHEAD_DEF, /* c_plan_ahead */ - DRBD_C_DELAY_TARGET_DEF, /* c_delay_target */ - DRBD_C_FILL_TARGET_DEF, /* c_fill_target */ - DRBD_C_MAX_RATE_DEF, /* c_max_rate */ - DRBD_C_MIN_RATE_DEF, /* c_min_rate */ - 0, /* no_disk_barrier */ - 0, /* no_disk_flush */ - 0, /* no_disk_drain */ - 0, /* no_md_flush */ - }; + drbd_set_disk_conf_defaults(&nbc->dc); err = disk_conf_from_attrs(&nbc->dc, info); if (err) { @@ -1845,6 +1906,8 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) } *new_conf = *old_conf; + if (should_set_defaults(info)) + drbd_set_net_conf_defaults(new_conf); err = net_conf_from_attrs_for_change(new_conf, info); if (err) { @@ -1949,45 +2012,13 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) } /* allocation not in the IO path, cqueue thread context */ - new_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL); + new_conf = kzalloc(sizeof(*new_conf), GFP_KERNEL); if (!new_conf) { retcode = ERR_NOMEM; goto fail; } - *new_conf = (struct net_conf) { - {}, 0, /* my_addr */ - {}, 0, /* peer_addr */ - {}, 0, /* shared_secret */ - {}, 0, /* cram_hmac_alg */ - {}, 0, /* integrity_alg */ - {}, 0, /* verify_alg */ - {}, 0, /* csums_alg */ - DRBD_PROTOCOL_DEF, /* wire_protocol */ - DRBD_CONNECT_INT_DEF, /* try_connect_int */ - DRBD_TIMEOUT_DEF, /* timeout */ - DRBD_PING_INT_DEF, /* ping_int */ - DRBD_PING_TIMEO_DEF, /* ping_timeo */ - DRBD_SNDBUF_SIZE_DEF, /* sndbuf_size */ - DRBD_RCVBUF_SIZE_DEF, /* rcvbuf_size */ - DRBD_KO_COUNT_DEF, /* ko_count */ - DRBD_MAX_BUFFERS_DEF, /* max_buffers */ - DRBD_MAX_EPOCH_SIZE_DEF, /* max_epoch_size */ - DRBD_UNPLUG_WATERMARK_DEF, /* unplug_watermark */ - DRBD_AFTER_SB_0P_DEF, /* after_sb_0p */ - DRBD_AFTER_SB_1P_DEF, /* after_sb_1p */ - DRBD_AFTER_SB_2P_DEF, /* after_sb_2p */ - DRBD_RR_CONFLICT_DEF, /* rr_conflict */ - DRBD_ON_CONGESTION_DEF, /* on_congestion */ - DRBD_CONG_FILL_DEF, /* cong_fill */ - DRBD_CONG_EXTENTS_DEF, /* cong_extents */ - 0, /* two_primaries */ - 0, /* want_lose */ - 0, /* no_cork */ - 0, /* always_asbp */ - 0, /* dry_run */ - 0, /* use_rle */ - }; + drbd_set_net_conf_defaults(new_conf); err = net_conf_from_attrs(new_conf, info); if (err) { @@ -2275,12 +2306,9 @@ int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) goto fail; } - if (((struct drbd_genlmsghdr*)info->userhdr)->flags - & DRBD_GENL_F_SET_DEFAULTS) { - memset(&sc, 0, sizeof(struct res_opts)); - sc.on_no_data = DRBD_ON_NO_DATA_DEF; - } else - sc = tconn->res_opts; + sc = tconn->res_opts; + if (should_set_defaults(info)) + drbd_set_res_opts_default(&sc); err = res_opts_from_attrs(&sc, info); if (err) { From 5ecc72c3b96f30e9f735afb9b1898ea407ae1dce Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 27 Apr 2011 21:14:57 +0200 Subject: [PATCH 342/609] drbd: rename variable ndc to new_disk_conf Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ac41aca72cb..3f70109b65f 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1159,7 +1159,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; struct drbd_conf *mdev; - struct disk_conf *ndc; /* new disk conf */ + struct disk_conf *new_disk_conf; int err, fifo_size; int *rs_plan_s = NULL; @@ -1184,39 +1184,39 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) * some half-updated struct when we * assign it later. */ - ndc = kmalloc(sizeof(*ndc), GFP_KERNEL); - if (!ndc) { + new_disk_conf = kmalloc(sizeof(*new_disk_conf), GFP_KERNEL); + if (!new_disk_conf) { retcode = ERR_NOMEM; goto fail; } - memcpy(ndc, &mdev->ldev->dc, sizeof(*ndc)); + memcpy(new_disk_conf, &mdev->ldev->dc, sizeof(*new_disk_conf)); if (should_set_defaults(info)) - drbd_set_disk_conf_defaults(ndc); + drbd_set_disk_conf_defaults(new_disk_conf); - err = disk_conf_from_attrs_for_change(ndc, info); + err = disk_conf_from_attrs_for_change(new_disk_conf, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); } - if (!expect(ndc->resync_rate >= 1)) - ndc->resync_rate = 1; + if (!expect(new_disk_conf->resync_rate >= 1)) + new_disk_conf->resync_rate = 1; /* clip to allowed range */ - if (!expect(ndc->al_extents >= DRBD_AL_EXTENTS_MIN)) - ndc->al_extents = DRBD_AL_EXTENTS_MIN; - if (!expect(ndc->al_extents <= DRBD_AL_EXTENTS_MAX)) - ndc->al_extents = DRBD_AL_EXTENTS_MAX; + if (!expect(new_disk_conf->al_extents >= DRBD_AL_EXTENTS_MIN)) + new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN; + if (!expect(new_disk_conf->al_extents <= DRBD_AL_EXTENTS_MAX)) + new_disk_conf->al_extents = DRBD_AL_EXTENTS_MAX; /* most sanity checks done, try to assign the new sync-after * dependency. need to hold the global lock in there, * to avoid a race in the dependency loop check. */ - retcode = drbd_alter_sa(mdev, ndc->resync_after); + retcode = drbd_alter_sa(mdev, new_disk_conf->resync_after); if (retcode != NO_ERROR) goto fail; - fifo_size = (ndc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; + fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ; if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); if (!rs_plan_s) { @@ -1236,7 +1236,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); drbd_al_shrink(mdev); - err = drbd_check_al_size(mdev, ndc); + err = drbd_check_al_size(mdev, new_disk_conf); lc_unlock(mdev->act_log); wake_up(&mdev->al_wait); @@ -1249,7 +1249,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) * To avoid someone looking at a half-updated struct, we probably * should have a rw-semaphor on net_conf and disk_conf. */ - mdev->ldev->dc = *ndc; + mdev->ldev->dc = *new_disk_conf; drbd_md_sync(mdev); @@ -1259,7 +1259,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) fail: put_ldev(mdev); - kfree(ndc); + kfree(new_disk_conf); kfree(rs_plan_s); out: drbd_adm_finish(info, retcode); From b57a1e27ee3efc3f5b2c7a474ce40e709e6b08cb Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 27 Apr 2011 21:17:33 +0200 Subject: [PATCH 343/609] drbd: rename variable sc to res_opts sc was short for syncer conf, which does not exist anymore anyways. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 3f70109b65f..ddc9de3dae9 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2290,7 +2290,7 @@ int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) cpumask_var_t new_cpu_mask; struct drbd_tconn *tconn; int *rs_plan_s = NULL; - struct res_opts sc; + struct res_opts res_opts; int err; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); @@ -2306,11 +2306,11 @@ int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) goto fail; } - sc = tconn->res_opts; + res_opts = tconn->res_opts; if (should_set_defaults(info)) - drbd_set_res_opts_default(&sc); + drbd_set_res_opts_default(&res_opts); - err = res_opts_from_attrs(&sc, info); + err = res_opts_from_attrs(&res_opts, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -2318,8 +2318,8 @@ int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) } /* silently ignore cpu mask on UP kernel */ - if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) { - err = __bitmap_parse(sc.cpu_mask, 32, 0, + if (nr_cpu_ids > 1 && res_opts.cpu_mask[0] != 0) { + err = __bitmap_parse(res_opts.cpu_mask, 32, 0, cpumask_bits(new_cpu_mask), nr_cpu_ids); if (err) { conn_warn(tconn, "__bitmap_parse() failed with %d\n", err); @@ -2329,7 +2329,7 @@ int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) } - tconn->res_opts = sc; + tconn->res_opts = res_opts; if (!cpumask_equal(tconn->cpu_mask, new_cpu_mask)) { cpumask_copy(tconn->cpu_mask, new_cpu_mask); From acb104c396f915a46a0ff5e0bd588764fcbbf1ab Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 28 Apr 2011 07:58:24 +0200 Subject: [PATCH 344/609] drbd: fix copy/paste error in comment Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 50b851e389e..933404e6ba2 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -307,7 +307,7 @@ static unsigned int rs_extent_to_bm_page(unsigned int rs_enr) return rs_enr >> /* bit to page */ ((PAGE_SHIFT + 3) - - /* al extent number to bit */ + /* resync extent number to bit */ (BM_EXT_SHIFT - BM_BLOCK_SHIFT)); } From 86db06180a48999b9f1883dd8bf871c882dbf075 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 28 Apr 2011 15:24:18 +0200 Subject: [PATCH 345/609] drbd: Wrong use of RCU in receive_protocol() It is not enough to grab net_conf->integrity_alg under rcu_read_lock() and access it outside of it; the entire net_conf object may be gone by then. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 32 ++++++++++++++++++------------ 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6da7aebde8d..98f03b143b3 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2998,7 +2998,6 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; int p_want_lose, p_two_primaries, cf; char p_integrity_alg[SHARED_SECRET_MAX] = ""; - unsigned char *my_alg; struct net_conf *nc; p_proto = be32_to_cpu(p->protocol); @@ -3009,6 +3008,18 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) cf = be32_to_cpu(p->conn_flags); p_want_lose = cf & CF_WANT_LOSE; + if (tconn->agreed_pro_version >= 87) { + int err; + + if (pi->size > sizeof(p_integrity_alg)) + return -EIO; + err = drbd_recv_all(tconn, p_integrity_alg, pi->size); + if (err) + return err; + + p_integrity_alg[SHARED_SECRET_MAX-1] = 0; + } + clear_bit(CONN_DRY_RUN, &tconn->flags); if (cf & CF_DRY_RUN) @@ -3047,23 +3058,18 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) goto disconnect_rcu_unlock; } - my_alg = nc->integrity_alg; - rcu_read_unlock(); - if (tconn->agreed_pro_version >= 87) { - int err; - - err = drbd_recv_all(tconn, p_integrity_alg, pi->size); - if (err) - return err; - - p_integrity_alg[SHARED_SECRET_MAX-1] = 0; - if (strcmp(p_integrity_alg, my_alg)) { + if (strcmp(p_integrity_alg, nc->integrity_alg)) { conn_err(tconn, "incompatible setting of the data-integrity-alg\n"); goto disconnect; } + } + + rcu_read_unlock(); + + if (tconn->agreed_pro_version >= 87) { conn_info(tconn, "data-integrity-alg: %s\n", - my_alg[0] ? my_alg : (unsigned char *)""); + nc->integrity_alg[0] ? nc->integrity_alg : (unsigned char *)""); } return 0; From 8d412fc6d58ae5e2e43f84461dd123bd0168b310 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 27 Apr 2011 20:59:18 +0200 Subject: [PATCH 346/609] drbd: Rename integrity_w_tfm -> integrity_tfm Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 18 +++++++++--------- drivers/block/drbd/drbd_nl.c | 16 ++++++++-------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e90bc9f93de..4787c79040c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -849,7 +849,7 @@ struct drbd_tconn { /* is a resource from the config file */ struct list_head out_of_sequence_requests; struct crypto_hash *cram_hmac_tfm; - struct crypto_hash *integrity_w_tfm; /* to be used by the worker thread */ + struct crypto_hash *integrity_tfm; /* checksums we compute */ struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */ struct crypto_hash *csums_tfm; struct crypto_hash *verify_tfm; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 7e56f88c0b6..986470537a6 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1689,8 +1689,8 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) int dgs; int err; - dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ? - crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0; + dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_tfm) ? + crypto_hash_digestsize(mdev->tconn->integrity_tfm) : 0; sock = &mdev->tconn->data; p = drbd_prepare_command(mdev, sock); @@ -1711,7 +1711,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) } p->dp_flags = cpu_to_be32(dp_flags); if (dgs) - drbd_csum_bio(mdev, mdev->tconn->integrity_w_tfm, req->master_bio, p + 1); + drbd_csum_bio(mdev, mdev->tconn->integrity_tfm, req->master_bio, p + 1); err = __send_command(mdev->tconn, mdev->vnr, sock, P_DATA, sizeof(*p) + dgs, NULL, req->i.size); if (!err) { /* For protocol A, we have to memcpy the payload into @@ -1735,7 +1735,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) /* 64 byte, 512 bit, is the largest digest size * currently supported in kernel crypto. */ unsigned char digest[64]; - drbd_csum_bio(mdev, mdev->tconn->integrity_w_tfm, req->master_bio, digest); + drbd_csum_bio(mdev, mdev->tconn->integrity_tfm, req->master_bio, digest); if (memcmp(p + 1, digest, dgs)) { dev_warn(DEV, "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n", @@ -1762,8 +1762,8 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, int err; int dgs; - dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ? - crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0; + dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_tfm) ? + crypto_hash_digestsize(mdev->tconn->integrity_tfm) : 0; sock = &mdev->tconn->data; p = drbd_prepare_command(mdev, sock); @@ -1773,7 +1773,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, p->block_id = peer_req->block_id; p->seq_num = 0; /* unused */ if (dgs) - drbd_csum_ee(mdev, mdev->tconn->integrity_w_tfm, peer_req, p + 1); + drbd_csum_ee(mdev, mdev->tconn->integrity_tfm, peer_req, p + 1); err = __send_command(mdev->tconn, mdev->vnr, sock, cmd, sizeof(*p) + dgs, NULL, peer_req->i.size); if (!err) err = _drbd_send_zc_ee(mdev, peer_req); @@ -2406,7 +2406,7 @@ void conn_free_crypto(struct drbd_tconn *tconn) crypto_free_hash(tconn->csums_tfm); crypto_free_hash(tconn->verify_tfm); crypto_free_hash(tconn->cram_hmac_tfm); - crypto_free_hash(tconn->integrity_w_tfm); + crypto_free_hash(tconn->integrity_tfm); crypto_free_hash(tconn->integrity_r_tfm); kfree(tconn->int_dig_in); kfree(tconn->int_dig_vv); @@ -2414,7 +2414,7 @@ void conn_free_crypto(struct drbd_tconn *tconn) tconn->csums_tfm = NULL; tconn->verify_tfm = NULL; tconn->cram_hmac_tfm = NULL; - tconn->integrity_w_tfm = NULL; + tconn->integrity_tfm = NULL; tconn->integrity_r_tfm = NULL; tconn->int_dig_in = NULL; tconn->int_dig_vv = NULL; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ddc9de3dae9..9aa1a7b1b50 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1792,7 +1792,7 @@ struct crypto { struct crypto_hash *verify_tfm; struct crypto_hash *csums_tfm; struct crypto_hash *cram_hmac_tfm; - struct crypto_hash *integrity_w_tfm; + struct crypto_hash *integrity_tfm; struct crypto_hash *integrity_r_tfm; void *int_dig_in; void *int_dig_vv; @@ -1831,7 +1831,7 @@ alloc_crypto(struct crypto *crypto, struct net_conf *new_conf) ERR_VERIFY_ALG, ERR_VERIFY_ALG_ND); if (rv != NO_ERROR) return rv; - rv = alloc_tfm(&crypto->integrity_w_tfm, new_conf->integrity_alg, + rv = alloc_tfm(&crypto->integrity_tfm, new_conf->integrity_alg, ERR_INTEGRITY_ALG, ERR_INTEGRITY_ALG_ND); if (rv != NO_ERROR) return rv; @@ -1846,8 +1846,8 @@ alloc_crypto(struct crypto *crypto, struct net_conf *new_conf) rv = alloc_tfm(&crypto->cram_hmac_tfm, hmac_name, ERR_AUTH_ALG, ERR_AUTH_ALG_ND); } - if (crypto->integrity_w_tfm) { - hash_size = crypto_hash_digestsize(crypto->integrity_w_tfm); + if (crypto->integrity_tfm) { + hash_size = crypto_hash_digestsize(crypto->integrity_tfm); crypto->int_dig_in = kmalloc(hash_size, GFP_KERNEL); if (!crypto->int_dig_in) return ERR_NOMEM; @@ -1864,7 +1864,7 @@ static void free_crypto(struct crypto *crypto) kfree(crypto->int_dig_in); kfree(crypto->int_dig_vv); crypto_free_hash(crypto->cram_hmac_tfm); - crypto_free_hash(crypto->integrity_w_tfm); + crypto_free_hash(crypto->integrity_tfm); crypto_free_hash(crypto->integrity_r_tfm); crypto_free_hash(crypto->csums_tfm); crypto_free_hash(crypto->verify_tfm); @@ -1956,8 +1956,8 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) tconn->int_dig_in = crypto.int_dig_in; kfree(tconn->int_dig_vv); tconn->int_dig_vv = crypto.int_dig_vv; - crypto_free_hash(tconn->integrity_w_tfm); - tconn->integrity_w_tfm = crypto.integrity_w_tfm; + crypto_free_hash(tconn->integrity_tfm); + tconn->integrity_tfm = crypto.integrity_tfm; crypto_free_hash(tconn->integrity_r_tfm); tconn->integrity_r_tfm = crypto.integrity_r_tfm; @@ -2083,7 +2083,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) tconn->int_dig_in = crypto.int_dig_in; tconn->int_dig_vv = crypto.int_dig_vv; tconn->cram_hmac_tfm = crypto.cram_hmac_tfm; - tconn->integrity_w_tfm = crypto.integrity_w_tfm; + tconn->integrity_tfm = crypto.integrity_tfm; tconn->integrity_r_tfm = crypto.integrity_r_tfm; tconn->csums_tfm = crypto.csums_tfm; tconn->verify_tfm = crypto.verify_tfm; From 5b614abe30645c3a528d54ff01da94fde0770c43 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 27 Apr 2011 21:00:12 +0200 Subject: [PATCH 347/609] drbd: Rename integrity_r_tfm -> peer_integrity_tfm Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 8 ++++---- drivers/block/drbd/drbd_nl.c | 12 ++++++------ drivers/block/drbd/drbd_receiver.c | 12 ++++++------ 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4787c79040c..e6e4f4e6410 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -850,7 +850,7 @@ struct drbd_tconn { /* is a resource from the config file */ struct crypto_hash *cram_hmac_tfm; struct crypto_hash *integrity_tfm; /* checksums we compute */ - struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */ + struct crypto_hash *peer_integrity_tfm; /* checksums we verify */ struct crypto_hash *csums_tfm; struct crypto_hash *verify_tfm; void *int_dig_in; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 986470537a6..747223539fd 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1405,8 +1405,8 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, void drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd, struct p_data *dp, int data_size) { - data_size -= (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ? - crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0; + data_size -= (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->peer_integrity_tfm) ? + crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm) : 0; _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size), dp->block_id); } @@ -2407,7 +2407,7 @@ void conn_free_crypto(struct drbd_tconn *tconn) crypto_free_hash(tconn->verify_tfm); crypto_free_hash(tconn->cram_hmac_tfm); crypto_free_hash(tconn->integrity_tfm); - crypto_free_hash(tconn->integrity_r_tfm); + crypto_free_hash(tconn->peer_integrity_tfm); kfree(tconn->int_dig_in); kfree(tconn->int_dig_vv); @@ -2415,7 +2415,7 @@ void conn_free_crypto(struct drbd_tconn *tconn) tconn->verify_tfm = NULL; tconn->cram_hmac_tfm = NULL; tconn->integrity_tfm = NULL; - tconn->integrity_r_tfm = NULL; + tconn->peer_integrity_tfm = NULL; tconn->int_dig_in = NULL; tconn->int_dig_vv = NULL; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 9aa1a7b1b50..e84b1d755f0 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1793,7 +1793,7 @@ struct crypto { struct crypto_hash *csums_tfm; struct crypto_hash *cram_hmac_tfm; struct crypto_hash *integrity_tfm; - struct crypto_hash *integrity_r_tfm; + struct crypto_hash *peer_integrity_tfm; void *int_dig_in; void *int_dig_vv; }; @@ -1835,7 +1835,7 @@ alloc_crypto(struct crypto *crypto, struct net_conf *new_conf) ERR_INTEGRITY_ALG, ERR_INTEGRITY_ALG_ND); if (rv != NO_ERROR) return rv; - rv = alloc_tfm(&crypto->integrity_r_tfm, new_conf->integrity_alg, + rv = alloc_tfm(&crypto->peer_integrity_tfm, new_conf->integrity_alg, ERR_INTEGRITY_ALG, ERR_INTEGRITY_ALG_ND); if (rv != NO_ERROR) return rv; @@ -1865,7 +1865,7 @@ static void free_crypto(struct crypto *crypto) kfree(crypto->int_dig_vv); crypto_free_hash(crypto->cram_hmac_tfm); crypto_free_hash(crypto->integrity_tfm); - crypto_free_hash(crypto->integrity_r_tfm); + crypto_free_hash(crypto->peer_integrity_tfm); crypto_free_hash(crypto->csums_tfm); crypto_free_hash(crypto->verify_tfm); } @@ -1958,8 +1958,8 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) tconn->int_dig_vv = crypto.int_dig_vv; crypto_free_hash(tconn->integrity_tfm); tconn->integrity_tfm = crypto.integrity_tfm; - crypto_free_hash(tconn->integrity_r_tfm); - tconn->integrity_r_tfm = crypto.integrity_r_tfm; + crypto_free_hash(tconn->peer_integrity_tfm); + tconn->peer_integrity_tfm = crypto.peer_integrity_tfm; /* FIXME Changing cram_hmac while the connection is established is useless */ crypto_free_hash(tconn->cram_hmac_tfm); @@ -2084,7 +2084,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) tconn->int_dig_vv = crypto.int_dig_vv; tconn->cram_hmac_tfm = crypto.cram_hmac_tfm; tconn->integrity_tfm = crypto.integrity_tfm; - tconn->integrity_r_tfm = crypto.integrity_r_tfm; + tconn->peer_integrity_tfm = crypto.peer_integrity_tfm; tconn->csums_tfm = crypto.csums_tfm; tconn->verify_tfm = crypto.verify_tfm; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 98f03b143b3..36b846bcdda 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1384,8 +1384,8 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, void *dig_vv = mdev->tconn->int_dig_vv; unsigned long *data; - dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ? - crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0; + dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->peer_integrity_tfm) ? + crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm) : 0; if (dgs) { /* @@ -1442,7 +1442,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, } if (dgs) { - drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, peer_req, dig_vv); + drbd_csum_ee(mdev, mdev->tconn->peer_integrity_tfm, peer_req, dig_vv); if (memcmp(dig_in, dig_vv, dgs)) { dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n", (unsigned long long)sector, data_size); @@ -1491,8 +1491,8 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, void *dig_in = mdev->tconn->int_dig_in; void *dig_vv = mdev->tconn->int_dig_vv; - dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ? - crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0; + dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->peer_integrity_tfm) ? + crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm) : 0; if (dgs) { err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs); @@ -1520,7 +1520,7 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, } if (dgs) { - drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv); + drbd_csum_bio(mdev, mdev->tconn->peer_integrity_tfm, bio, dig_vv); if (memcmp(dig_in, dig_vv, dgs)) { dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n"); return -EINVAL; From 4b6ad6d4579908ff82a47ad9ba067a504b1e0882 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 29 Apr 2011 10:20:08 +0200 Subject: [PATCH 348/609] drbd: Remove obsolete drbd_crypto_is_hash() We allocate hash transformations with crypto_alloc_hash() which will only return hash algorithms. It is not necessary to reconfirm that we actually got a hash algorithm. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 25 +++++++++++-------------- drivers/block/drbd/drbd_receiver.c | 5 ----- drivers/block/drbd/drbd_wrappers.h | 6 ------ 3 files changed, 11 insertions(+), 25 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index e84b1d755f0..dc5824b175a 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1799,7 +1799,7 @@ struct crypto { }; static int -alloc_tfm(struct crypto_hash **tfm, char *tfm_name, int err_alg, int err_nd) +alloc_hash(struct crypto_hash **tfm, char *tfm_name, int err_alg) { if (!tfm_name[0]) return NO_ERROR; @@ -1810,9 +1810,6 @@ alloc_tfm(struct crypto_hash **tfm, char *tfm_name, int err_alg, int err_nd) return err_alg; } - if (!drbd_crypto_is_hash(crypto_hash_tfm(*tfm))) - return err_nd; - return NO_ERROR; } @@ -1823,28 +1820,28 @@ alloc_crypto(struct crypto *crypto, struct net_conf *new_conf) enum drbd_ret_code rv; int hash_size; - rv = alloc_tfm(&crypto->csums_tfm, new_conf->csums_alg, - ERR_CSUMS_ALG, ERR_CSUMS_ALG_ND); + rv = alloc_hash(&crypto->csums_tfm, new_conf->csums_alg, + ERR_CSUMS_ALG); if (rv != NO_ERROR) return rv; - rv = alloc_tfm(&crypto->verify_tfm, new_conf->verify_alg, - ERR_VERIFY_ALG, ERR_VERIFY_ALG_ND); + rv = alloc_hash(&crypto->verify_tfm, new_conf->verify_alg, + ERR_VERIFY_ALG); if (rv != NO_ERROR) return rv; - rv = alloc_tfm(&crypto->integrity_tfm, new_conf->integrity_alg, - ERR_INTEGRITY_ALG, ERR_INTEGRITY_ALG_ND); + rv = alloc_hash(&crypto->integrity_tfm, new_conf->integrity_alg, + ERR_INTEGRITY_ALG); if (rv != NO_ERROR) return rv; - rv = alloc_tfm(&crypto->peer_integrity_tfm, new_conf->integrity_alg, - ERR_INTEGRITY_ALG, ERR_INTEGRITY_ALG_ND); + rv = alloc_hash(&crypto->peer_integrity_tfm, new_conf->integrity_alg, + ERR_INTEGRITY_ALG); if (rv != NO_ERROR) return rv; if (new_conf->cram_hmac_alg[0] != 0) { snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)", new_conf->cram_hmac_alg); - rv = alloc_tfm(&crypto->cram_hmac_tfm, hmac_name, - ERR_AUTH_ALG, ERR_AUTH_ALG_ND); + rv = alloc_hash(&crypto->cram_hmac_tfm, hmac_name, + ERR_AUTH_ALG); } if (crypto->integrity_tfm) { hash_size = crypto_hash_digestsize(crypto->integrity_tfm); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 36b846bcdda..30b655644af 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3100,11 +3100,6 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, alg, name, PTR_ERR(tfm)); return tfm; } - if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) { - crypto_free_hash(tfm); - dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name); - return ERR_PTR(-EINVAL); - } return tfm; } diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h index decf9b282e8..46a6d99f7b6 100644 --- a/drivers/block/drbd/drbd_wrappers.h +++ b/drivers/block/drbd/drbd_wrappers.h @@ -45,12 +45,6 @@ static inline void drbd_generic_make_request(struct drbd_conf *mdev, generic_make_request(bio); } -static inline int drbd_crypto_is_hash(struct crypto_tfm *tfm) -{ - return (crypto_tfm_alg_type(tfm) & CRYPTO_ALG_TYPE_HASH_MASK) - == CRYPTO_ALG_TYPE_HASH; -} - #ifndef __CHECKER__ # undef __cond_lock # define __cond_lock(x,c) (c) From a7eb7bdf58b5dd98560ee6fa5caf2fcdd1779a47 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 29 Apr 2011 13:19:58 +0200 Subject: [PATCH 349/609] drbd: Introduce a "lockless" variant of drbd_send_protocoll() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_main.c | 44 +++++++++++++++++++++++++++------- 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e6e4f4e6410..44f77265d2b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1051,6 +1051,7 @@ extern int drbd_send(struct drbd_tconn *tconn, struct socket *sock, extern int drbd_send_all(struct drbd_tconn *, struct socket *, void *, size_t, unsigned); +extern int __drbd_send_protocol(struct drbd_tconn *tconn); extern int drbd_send_protocol(struct drbd_tconn *tconn); extern int drbd_send_uuids(struct drbd_conf *mdev); extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 747223539fd..3cb2af6c606 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -757,14 +757,24 @@ static unsigned int prepare_header(struct drbd_tconn *tconn, int vnr, return prepare_header80(buffer, cmd, size); } +static void *__conn_prepare_command(struct drbd_tconn *tconn, + struct drbd_socket *sock) +{ + if (!sock->socket) + return NULL; + return sock->sbuf + drbd_header_size(tconn); +} + void *conn_prepare_command(struct drbd_tconn *tconn, struct drbd_socket *sock) { + void *p; + mutex_lock(&sock->mutex); - if (!sock->socket) { + p = __conn_prepare_command(tconn, sock); + if (!p) mutex_unlock(&sock->mutex); - return NULL; - } - return sock->sbuf + drbd_header_size(tconn); + + return p; } void *drbd_prepare_command(struct drbd_conf *mdev, struct drbd_socket *sock) @@ -798,13 +808,20 @@ static int __send_command(struct drbd_tconn *tconn, int vnr, return err; } +static int __conn_send_command(struct drbd_tconn *tconn, struct drbd_socket *sock, + enum drbd_packet cmd, unsigned int header_size, + void *data, unsigned int size) +{ + return __send_command(tconn, 0, sock, cmd, header_size, data, size); +} + int conn_send_command(struct drbd_tconn *tconn, struct drbd_socket *sock, enum drbd_packet cmd, unsigned int header_size, void *data, unsigned int size) { int err; - err = __send_command(tconn, 0, sock, cmd, header_size, data, size); + err = __conn_send_command(tconn, sock, cmd, header_size, data, size); mutex_unlock(&sock->mutex); return err; } @@ -893,7 +910,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev) return drbd_send_command(mdev, sock, cmd, size, NULL, 0); } -int drbd_send_protocol(struct drbd_tconn *tconn) +int __drbd_send_protocol(struct drbd_tconn *tconn) { struct drbd_socket *sock; struct p_protocol *p; @@ -901,7 +918,7 @@ int drbd_send_protocol(struct drbd_tconn *tconn) int size, cf; sock = &tconn->data; - p = conn_prepare_command(tconn, sock); + p = __conn_prepare_command(tconn, sock); if (!p) return -EIO; @@ -935,7 +952,18 @@ int drbd_send_protocol(struct drbd_tconn *tconn) strcpy(p->integrity_alg, nc->integrity_alg); rcu_read_unlock(); - return conn_send_command(tconn, sock, P_PROTOCOL, size, NULL, 0); + return __conn_send_command(tconn, sock, P_PROTOCOL, size, NULL, 0); +} + +int drbd_send_protocol(struct drbd_tconn *tconn) +{ + int err; + + mutex_lock(&tconn->data.mutex); + err = __drbd_send_protocol(tconn); + mutex_unlock(&tconn->data.mutex); + + return err; } int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) From 88104ca458dff8ed7db935936b91b9af4422c9cd Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 28 Apr 2011 21:47:21 +0200 Subject: [PATCH 350/609] drbd: Allow to change data-integrity-alg on the fly The main purpose of this is to allow to turn data integrity checking on and off on demand without causing interruptions. Implemented by allocating tconn->peer_integrity_tfm only when receiving a P_PROTOCOL message. l accesses to tconn->peer_integrity_tf happen in worker context, and no further synchronization is necessary. On the sender side, tconn->integrity_tfm is modified under tconn->data.mutex, and a P_PROTOCOL message is sent whenever. All accesses to tconn->integrity_tfm already happen under this mutex. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 4 +-- drivers/block/drbd/drbd_nl.c | 22 ++++++------ drivers/block/drbd/drbd_receiver.c | 54 ++++++++++++++---------------- 3 files changed, 40 insertions(+), 40 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 3cb2af6c606..a5c9b385223 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1433,8 +1433,8 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, void drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd, struct p_data *dp, int data_size) { - data_size -= (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->peer_integrity_tfm) ? - crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm) : 0; + if (mdev->tconn->peer_integrity_tfm) + data_size -= crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm); _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size), dp->block_id); } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index dc5824b175a..17c0cda7bbe 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1793,7 +1793,6 @@ struct crypto { struct crypto_hash *csums_tfm; struct crypto_hash *cram_hmac_tfm; struct crypto_hash *integrity_tfm; - struct crypto_hash *peer_integrity_tfm; void *int_dig_in; void *int_dig_vv; }; @@ -1832,10 +1831,6 @@ alloc_crypto(struct crypto *crypto, struct net_conf *new_conf) ERR_INTEGRITY_ALG); if (rv != NO_ERROR) return rv; - rv = alloc_hash(&crypto->peer_integrity_tfm, new_conf->integrity_alg, - ERR_INTEGRITY_ALG); - if (rv != NO_ERROR) - return rv; if (new_conf->cram_hmac_alg[0] != 0) { snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)", new_conf->cram_hmac_alg); @@ -1862,7 +1857,6 @@ static void free_crypto(struct crypto *crypto) kfree(crypto->int_dig_vv); crypto_free_hash(crypto->cram_hmac_tfm); crypto_free_hash(crypto->integrity_tfm); - crypto_free_hash(crypto->peer_integrity_tfm); crypto_free_hash(crypto->csums_tfm); crypto_free_hash(crypto->verify_tfm); } @@ -1876,6 +1870,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) int ovr; /* online verify running */ int rsr; /* re-sync running */ struct crypto crypto = { }; + bool change_integrity_alg; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); if (!adm_ctx.reply_skb) @@ -1893,6 +1888,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) conn_reconfig_start(tconn); + mutex_lock(&tconn->data.mutex); mutex_lock(&tconn->net_conf_update); old_conf = tconn->net_conf; @@ -1931,6 +1927,9 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) goto fail; } + change_integrity_alg = strcmp(old_conf->integrity_alg, + new_conf->integrity_alg); + retcode = alloc_crypto(&crypto, new_conf); if (retcode != NO_ERROR) goto fail; @@ -1948,21 +1947,24 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) crypto.verify_tfm = NULL; } - /* FIXME can not assign these so bluntly while we have ongoing IO */ kfree(tconn->int_dig_in); tconn->int_dig_in = crypto.int_dig_in; kfree(tconn->int_dig_vv); tconn->int_dig_vv = crypto.int_dig_vv; crypto_free_hash(tconn->integrity_tfm); tconn->integrity_tfm = crypto.integrity_tfm; - crypto_free_hash(tconn->peer_integrity_tfm); - tconn->peer_integrity_tfm = crypto.peer_integrity_tfm; + if (change_integrity_alg) { + /* Do this without trying to take tconn->data.mutex again. */ + if (__drbd_send_protocol(tconn)) + goto fail; + } /* FIXME Changing cram_hmac while the connection is established is useless */ crypto_free_hash(tconn->cram_hmac_tfm); tconn->cram_hmac_tfm = crypto.cram_hmac_tfm; mutex_unlock(&tconn->net_conf_update); + mutex_unlock(&tconn->data.mutex); synchronize_rcu(); kfree(old_conf); @@ -1973,6 +1975,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) fail: mutex_unlock(&tconn->net_conf_update); + mutex_unlock(&tconn->data.mutex); free_crypto(&crypto); kfree(new_conf); done: @@ -2081,7 +2084,6 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) tconn->int_dig_vv = crypto.int_dig_vv; tconn->cram_hmac_tfm = crypto.cram_hmac_tfm; tconn->integrity_tfm = crypto.integrity_tfm; - tconn->peer_integrity_tfm = crypto.peer_integrity_tfm; tconn->csums_tfm = crypto.csums_tfm; tconn->verify_tfm = crypto.verify_tfm; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 30b655644af..9c8bcce0e68 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1384,10 +1384,9 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, void *dig_vv = mdev->tconn->int_dig_vv; unsigned long *data; - dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->peer_integrity_tfm) ? - crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm) : 0; - - if (dgs) { + dgs = 0; + if (mdev->tconn->peer_integrity_tfm) { + dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm); /* * FIXME: Receive the incoming digest into the receive buffer * here, together with its struct p_data? @@ -1395,10 +1394,9 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs); if (err) return NULL; + data_size -= dgs; } - data_size -= dgs; - if (!expect(data_size != 0)) return NULL; if (!expect(IS_ALIGNED(data_size, 512))) @@ -1491,17 +1489,15 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, void *dig_in = mdev->tconn->int_dig_in; void *dig_vv = mdev->tconn->int_dig_vv; - dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->peer_integrity_tfm) ? - crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm) : 0; - - if (dgs) { + dgs = 0; + if (mdev->tconn->peer_integrity_tfm) { + dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm); err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs); if (err) return err; + data_size -= dgs; } - data_size -= dgs; - /* optimistically update recv_cnt. if receiving fails below, * we disconnect anyways, and counters will be reset. */ mdev->recv_cnt += data_size>>9; @@ -2997,7 +2993,6 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) struct p_protocol *p = pi->data; int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; int p_want_lose, p_two_primaries, cf; - char p_integrity_alg[SHARED_SECRET_MAX] = ""; struct net_conf *nc; p_proto = be32_to_cpu(p->protocol); @@ -3009,15 +3004,30 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) p_want_lose = cf & CF_WANT_LOSE; if (tconn->agreed_pro_version >= 87) { + char integrity_alg[SHARED_SECRET_MAX]; + struct crypto_hash *tfm = NULL; int err; - if (pi->size > sizeof(p_integrity_alg)) + if (pi->size > sizeof(integrity_alg)) return -EIO; - err = drbd_recv_all(tconn, p_integrity_alg, pi->size); + err = drbd_recv_all(tconn, integrity_alg, pi->size); if (err) return err; + integrity_alg[SHARED_SECRET_MAX-1] = 0; - p_integrity_alg[SHARED_SECRET_MAX-1] = 0; + if (integrity_alg[0]) { + tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC); + if (!tfm) { + conn_err(tconn, "peer data-integrity-alg %s not supported\n", + integrity_alg); + goto disconnect; + } + conn_info(tconn, "peer data-integrity-alg: %s\n", integrity_alg); + } + + if (tconn->peer_integrity_tfm) + crypto_free_hash(tconn->peer_integrity_tfm); + tconn->peer_integrity_tfm = tfm; } clear_bit(CONN_DRY_RUN, &tconn->flags); @@ -3058,20 +3068,8 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) goto disconnect_rcu_unlock; } - if (tconn->agreed_pro_version >= 87) { - if (strcmp(p_integrity_alg, nc->integrity_alg)) { - conn_err(tconn, "incompatible setting of the data-integrity-alg\n"); - goto disconnect; - } - } - rcu_read_unlock(); - if (tconn->agreed_pro_version >= 87) { - conn_info(tconn, "data-integrity-alg: %s\n", - nc->integrity_alg[0] ? nc->integrity_alg : (unsigned char *)""); - } - return 0; disconnect_rcu_unlock: From f3dfa40a67c354a5886c5ae53a9c5d3a2c6fd06e Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 2 May 2011 10:45:05 +0200 Subject: [PATCH 351/609] drbd: fix race when forcefully disconnecting If a forced disconnect hits a restarting receiver right after it passed its final "if (C_DISCONNECTING)" test in drbdd_init(), but before it was actually restarted by drbd_thread_setup, we could be left with a connection stuck in C_DISCONNECTING, never reaching C_STANDALONE, which would be necessary to take it down or reconfigure it. Move the last cleanup into w_after_conn_state_ch(), and do an additional state change request in conn_try_disconnect(), just in case. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 85 +++++++++++++++++------------- drivers/block/drbd/drbd_receiver.c | 14 +---- drivers/block/drbd/drbd_state.c | 13 +++++ 3 files changed, 62 insertions(+), 50 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 17c0cda7bbe..9d9b93f0885 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2115,37 +2115,54 @@ out: static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool force) { enum drbd_state_rv rv; - if (force) { - spin_lock_irq(&tconn->req_lock); - rv = _conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); - spin_unlock_irq(&tconn->req_lock); - return rv; - } - rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING), 0); + rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING), + force ? CS_HARD : 0); switch (rv) { case SS_NOTHING_TO_DO: + break; case SS_ALREADY_STANDALONE: return SS_SUCCESS; case SS_PRIMARY_NOP: /* Our state checking code wants to see the peer outdated. */ rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, - pdsk, D_OUTDATED), CS_VERBOSE); + pdsk, D_OUTDATED), CS_VERBOSE); break; case SS_CW_FAILED_BY_PEER: /* The peer probably wants to see us outdated. */ rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, disk, D_OUTDATED), 0); if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) { - conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); - rv = SS_SUCCESS; + rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING), + CS_HARD); } break; default:; /* no special handling necessary */ } + if (rv >= SS_SUCCESS) { + enum drbd_state_rv rv2; + /* No one else can reconfigure the network while I am here. + * The state handling only uses drbd_thread_stop_nowait(), + * we want to really wait here until the receiver is no more. + */ + drbd_thread_stop(&adm_ctx.tconn->receiver); + + /* Race breaker. This additional state change request may be + * necessary, if this was a forced disconnect during a receiver + * restart. We may have "killed" the receiver thread just + * after drbdd_init() returned. Typically, we should be + * C_STANDALONE already, now, and this becomes a no-op. + */ + rv2 = conn_request_state(tconn, NS(conn, C_STANDALONE), + CS_VERBOSE | CS_HARD); + if (rv2 < SS_SUCCESS) + conn_err(tconn, + "unexpected rv2=%d in conn_try_disconnect()\n", + rv2); + } return rv; } @@ -2176,19 +2193,9 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) rv = conn_try_disconnect(tconn, parms.force_disconnect); if (rv < SS_SUCCESS) - goto fail; - - /* No one else can reconfigure the network while I am here. - * The state handling only uses drbd_thread_stop_nowait(), - * we want to really wait here until the receiver is no more. */ - drbd_thread_stop(&tconn->receiver); - if (wait_event_interruptible(tconn->ping_wait, - tconn->cstate == C_STANDALONE)) { - retcode = ERR_INTR; - goto fail; - } - - retcode = NO_ERROR; + retcode = rv; /* FIXME: Type mismatch. */ + else + retcode = NO_ERROR; fail: drbd_adm_finish(info, retcode); return 0; @@ -3049,8 +3056,7 @@ out: int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) { - enum drbd_ret_code retcode; - enum drbd_state_rv rv; + int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */ struct drbd_conf *mdev; unsigned i; @@ -3074,30 +3080,35 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) goto out_unlock; } } + up_read(&drbd_cfg_rwsem); - /* disconnect */ - rv = conn_try_disconnect(adm_ctx.tconn, 0); - if (rv < SS_SUCCESS) { - retcode = rv; /* enum type mismatch! */ + /* disconnect; may stop the receiver; + * must not hold the drbd_cfg_rwsem */ + retcode = conn_try_disconnect(adm_ctx.tconn, 0); + if (retcode < SS_SUCCESS) { drbd_msg_put_info("failed to disconnect"); - goto out_unlock; + goto out; } - /* Make sure the network threads have actually stopped, - * state handling only does drbd_thread_stop_nowait(). */ - drbd_thread_stop(&adm_ctx.tconn->receiver); - + down_read(&drbd_cfg_rwsem); /* detach */ idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { - rv = adm_detach(mdev); - if (rv < SS_SUCCESS) { - retcode = rv; /* enum type mismatch! */ + retcode = adm_detach(mdev); + if (retcode < SS_SUCCESS) { drbd_msg_put_info("failed to detach"); goto out_unlock; } } up_read(&drbd_cfg_rwsem); + /* If we reach this, all volumes (of this tconn) are Secondary, + * Disconnected, Diskless, aka Unconfigured. Make sure all threads have + * actually stopped, state handling only does drbd_thread_stop_nowait(). + * This needs to be done without holding drbd_cfg_rwsem. */ + drbd_thread_stop(&adm_ctx.tconn->worker); + + /* Now, nothing can fail anymore */ + /* delete volumes */ down_write(&drbd_cfg_rwsem); idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 9c8bcce0e68..956cdda9343 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4213,20 +4213,8 @@ static void drbd_disconnect(struct drbd_tconn *tconn) spin_unlock_irq(&tconn->req_lock); - if (oc == C_DISCONNECTING) { - struct net_conf *old_conf; - - mutex_lock(&tconn->net_conf_update); - old_conf = tconn->net_conf; - rcu_assign_pointer(tconn->net_conf, NULL); - conn_free_crypto(tconn); - mutex_unlock(&tconn->net_conf_update); - - synchronize_rcu(); - kfree(old_conf); - + if (oc == C_DISCONNECTING) conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD); - } } static int drbd_disconnected(int vnr, void *p, void *data) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 8b0f31b6808..0512bbb952e 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1416,6 +1416,19 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) if (oc == C_STANDALONE && ns_max.conn == C_UNCONNECTED) drbd_thread_start(&tconn->receiver); + if (oc == C_DISCONNECTING && ns_max.conn == C_STANDALONE) { + struct net_conf *old_conf; + + mutex_lock(&tconn->net_conf_update); + old_conf = tconn->net_conf; + rcu_assign_pointer(tconn->net_conf, NULL); + conn_free_crypto(tconn); + mutex_unlock(&tconn->net_conf_update); + + synchronize_rcu(); + kfree(old_conf); + } + if (ns_max.susp_fen) { /* case1: The outdate peer handler is successful: */ if (ns_max.pdsk <= D_OUTDATED) { From 992d6e91d3654c11c2e4d8d5933ffbf82a0440f0 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 2 May 2011 11:47:18 +0200 Subject: [PATCH 352/609] drbd: fix thread stop deadlock There are races where the receiver may be exiting, but still need the worker to process some stuff. Do not wait for the receiver to die from an exiting worker. The receiver must already be dead in case the worker decides to exit. If the receiver was still alive, it may still want to queue work, and do drbd_flush_workqueue() from it's disconnect cleanup code, which would no longer be processed by an exiting worker. This also would deadlock, if the worker was to synchornously wait for the receiver to die. Do not implicitly stop the worker. The worker will only be stopped from configuration context, from conn_reconfig_done(), drbd_adm_down() or drbd_adm_delete_connection(), after making sure the receiver is already stopped. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_nl.c | 14 ++++++++++---- drivers/block/drbd/drbd_state.c | 14 -------------- drivers/block/drbd/drbd_worker.c | 4 ---- 4 files changed, 11 insertions(+), 23 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index a5c9b385223..427e959e486 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -503,7 +503,7 @@ restart: thi->task = NULL; thi->t_state = NONE; smp_mb(); - complete(&thi->stop); + complete_all(&thi->stop); spin_unlock_irqrestore(&thi->t_lock, flags); conn_info(tconn, "Terminating %s\n", current->comm); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 9d9b93f0885..25468e2be8d 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1050,10 +1050,16 @@ static void conn_reconfig_start(struct drbd_tconn *tconn) /* if still unconfigured, stops worker again. */ static void conn_reconfig_done(struct drbd_tconn *tconn) { + bool stop_threads; spin_lock_irq(&tconn->req_lock); - if (conn_all_vols_unconf(tconn)) - drbd_thread_stop_nowait(&tconn->worker); + stop_threads = conn_all_vols_unconf(tconn); spin_unlock_irq(&tconn->req_lock); + if (stop_threads) { + /* asender is implicitly stopped by receiver + * in drbd_disconnect() */ + drbd_thread_stop(&tconn->receiver); + drbd_thread_stop(&tconn->worker); + } } /* Make sure IO is suspended before calling this function(). */ @@ -3123,7 +3129,6 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) /* delete connection */ if (conn_lowest_minor(adm_ctx.tconn) < 0) { - drbd_thread_stop(&adm_ctx.tconn->worker); list_del(&adm_ctx.tconn->all_tconn); kref_put(&adm_ctx.tconn->kref, &conn_destroy); @@ -3133,7 +3138,6 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) retcode = ERR_CONN_IN_USE; drbd_msg_put_info("failed to delete connection"); } - up_write(&drbd_cfg_rwsem); goto out; out_unlock: @@ -3164,6 +3168,8 @@ int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info) } up_write(&drbd_cfg_rwsem); + if (retcode == NO_ERROR) + drbd_thread_stop(&adm_ctx.tconn->worker); out: drbd_adm_finish(info, retcode); return 0; diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 0512bbb952e..523ec094067 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -40,7 +40,6 @@ struct after_state_chg_work { static int w_after_state_ch(struct drbd_work *w, int unused); static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); -static void after_all_state_ch(struct drbd_tconn *tconn); static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state); static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns); @@ -1380,8 +1379,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, resume_next_sg(mdev); } - after_all_state_ch(mdev->tconn); - drbd_md_sync(mdev); } @@ -1393,12 +1390,6 @@ struct after_conn_state_chg_work { enum chg_state_flags flags; }; -static void after_all_state_ch(struct drbd_tconn *tconn) -{ - if (conn_all_vols_unconf(tconn)) - drbd_thread_stop_nowait(&tconn->worker); -} - static int w_after_conn_state_ch(struct drbd_work *w, int unused) { struct after_conn_state_chg_work *acscw = @@ -1461,12 +1452,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) spin_unlock_irq(&tconn->req_lock); } } - - - //conn_err(tconn, STATE_FMT, STATE_ARGS("nms", nms)); - after_all_state_ch(tconn); kref_put(&tconn->kref, &conn_destroy); - return 0; } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 7a73bd4287c..0da1547bb2d 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1744,10 +1744,6 @@ int drbd_worker(struct drbd_thread *thi) */ spin_unlock_irq(&tconn->data.work.q_lock); - /* _drbd_set_state only uses stop_nowait. - * wait here for the exiting receiver. */ - drbd_thread_stop(&tconn->receiver); - down_read(&drbd_cfg_rwsem); idr_for_each_entry(&tconn->volumes, mdev, vnr) { D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); From 009ba89db5ae836949009f97a00abb96feba69f4 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 2 May 2011 11:51:31 +0200 Subject: [PATCH 353/609] drbd: fix schedule in atomic An administrative detach used to request a state change directly to D_DISKLESS, first suspending IO to avoid the last put_ldev() occuring from an endio handler, potentially in irq context. This is not enough on the receiving side (typically secondary), we may miss some peer_req on the way to local disk, which then may do the last put_ldev() from their drbd_peer_request_endio(). This patch makes the detach always go through the intermediate D_FAILED state. We may consider to rename it D_DETACHING. Alternative approach would be to create yet an other work item to be scheduled on the worker, do the destructor work from there, and get the timing right. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 25468e2be8d..7320ac00f0f 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1670,12 +1670,17 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) static int adm_detach(struct drbd_conf *mdev) { enum drbd_state_rv retcode; + int ret; drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ - retcode = drbd_request_state(mdev, NS(disk, D_DISKLESS)); - wait_event(mdev->misc_wait, - mdev->state.disk != D_DISKLESS || - !atomic_read(&mdev->local_cnt)); + retcode = drbd_request_state(mdev, NS(disk, D_FAILED)); + /* D_FAILED will transition to DISKLESS. */ + ret = wait_event_interruptible(mdev->misc_wait, + mdev->state.disk != D_FAILED); drbd_resume_io(mdev); + if ((int)retcode == (int)SS_IS_DISKLESS) + retcode = SS_NOTHING_TO_DO; + if (ret) + retcode = ERR_INTR; return retcode; } From d8cd289dbe69ce9b8115d6f200ceff657e5dafa0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 3 May 2011 12:27:11 +0200 Subject: [PATCH 354/609] drbd: Remove left-over unused define Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 7f5149bef70..bcebb016fda 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -170,5 +170,4 @@ #define DRBD_PROTOCOL_DEF DRBD_PROT_C -#undef RANGE #endif From b966b5dd8e17e6c105ca55533fd412de5d5b429e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 3 May 2011 14:56:09 +0200 Subject: [PATCH 355/609] drbd: Generate the drbd_set_*_defaults() functions from drbd_genl.h Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_nl.c | 86 ++++-------------------------- include/linux/drbd_genl.h | 87 +++++++++++++++---------------- include/linux/genl_magic_func.h | 26 +++++++++ include/linux/genl_magic_struct.h | 8 +++ 6 files changed, 89 insertions(+), 122 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 44f77265d2b..8655fcb8202 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1390,7 +1390,7 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t); /* drbd_nl.c */ -extern void drbd_set_res_opts_default(struct res_opts *r); +extern void drbd_set_res_opts_defaults(struct res_opts *r); extern int drbd_msg_put_info(const char *info); extern void drbd_suspend_io(struct drbd_conf *mdev); extern void drbd_resume_io(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 427e959e486..4ae3e7a99d7 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2488,7 +2488,7 @@ struct drbd_tconn *conn_create(const char *name) drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker"); drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender"); - drbd_set_res_opts_default(&tconn->res_opts); + drbd_set_res_opts_defaults(&tconn->res_opts); down_write(&drbd_cfg_rwsem); kref_init(&tconn->kref); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 7320ac00f0f..f5732cf46c2 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1090,77 +1090,6 @@ static bool should_set_defaults(struct genl_info *info) return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS); } -/* Maybe we should we generate these functions - * from the drbd_genl.h magic as well? - * That way we would not "accidentally forget" to add defaults here. */ - -#define RESET_ARRAY_FIELD(field) do { \ - memset(field, 0, sizeof(field)); \ - field ## _len = 0; \ -} while (0) -void drbd_set_res_opts_default(struct res_opts *r) -{ - RESET_ARRAY_FIELD(r->cpu_mask); - r->on_no_data = DRBD_ON_NO_DATA_DEF; -} - -static void drbd_set_net_conf_defaults(struct net_conf *nc) -{ - /* Do NOT (re)set those fields marked as GENLA_F_INVARIANT - * in drbd_genl.h, they can only be change with disconnect/reconnect */ - RESET_ARRAY_FIELD(nc->shared_secret); - - RESET_ARRAY_FIELD(nc->cram_hmac_alg); - RESET_ARRAY_FIELD(nc->integrity_alg); - RESET_ARRAY_FIELD(nc->verify_alg); - RESET_ARRAY_FIELD(nc->csums_alg); -#undef RESET_ARRAY_FIELD - - nc->wire_protocol = DRBD_PROTOCOL_DEF; - nc->try_connect_int = DRBD_CONNECT_INT_DEF; - nc->timeout = DRBD_TIMEOUT_DEF; - nc->ping_int = DRBD_PING_INT_DEF; - nc->ping_timeo = DRBD_PING_TIMEO_DEF; - nc->sndbuf_size = DRBD_SNDBUF_SIZE_DEF; - nc->rcvbuf_size = DRBD_RCVBUF_SIZE_DEF; - nc->ko_count = DRBD_KO_COUNT_DEF; - nc->max_buffers = DRBD_MAX_BUFFERS_DEF; - nc->max_epoch_size = DRBD_MAX_EPOCH_SIZE_DEF; - nc->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF; - nc->after_sb_0p = DRBD_AFTER_SB_0P_DEF; - nc->after_sb_1p = DRBD_AFTER_SB_1P_DEF; - nc->after_sb_2p = DRBD_AFTER_SB_2P_DEF; - nc->rr_conflict = DRBD_RR_CONFLICT_DEF; - nc->on_congestion = DRBD_ON_CONGESTION_DEF; - nc->cong_fill = DRBD_CONG_FILL_DEF; - nc->cong_extents = DRBD_CONG_EXTENTS_DEF; - nc->two_primaries = 0; - nc->no_cork = 0; - nc->always_asbp = 0; - nc->use_rle = 0; -} - -static void drbd_set_disk_conf_defaults(struct disk_conf *dc) -{ - /* Do NOT (re)set those fields marked as GENLA_F_INVARIANT - * in drbd_genl.h, they can only be change with detach/reattach */ - dc->on_io_error = DRBD_ON_IO_ERROR_DEF; - dc->fencing = DRBD_FENCING_DEF; - dc->resync_rate = DRBD_RATE_DEF; - dc->resync_after = DRBD_AFTER_DEF; - dc->al_extents = DRBD_AL_EXTENTS_DEF; - dc->c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF; - dc->c_delay_target = DRBD_C_DELAY_TARGET_DEF; - dc->c_fill_target = DRBD_C_FILL_TARGET_DEF; - dc->c_max_rate = DRBD_C_MAX_RATE_DEF; - dc->c_min_rate = DRBD_C_MIN_RATE_DEF; - dc->no_disk_barrier = 0; - dc->no_disk_flush = 0; - dc->no_disk_drain = 0; - dc->no_md_flush = 0; -} - - int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; @@ -1198,7 +1127,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) memcpy(new_disk_conf, &mdev->ldev->dc, sizeof(*new_disk_conf)); if (should_set_defaults(info)) - drbd_set_disk_conf_defaults(new_disk_conf); + set_disk_conf_defaults(new_disk_conf); err = disk_conf_from_attrs_for_change(new_disk_conf, info); if (err) { @@ -1315,7 +1244,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto fail; } - drbd_set_disk_conf_defaults(&nbc->dc); + set_disk_conf_defaults(&nbc->dc); err = disk_conf_from_attrs(&nbc->dc, info); if (err) { @@ -1911,7 +1840,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) *new_conf = *old_conf; if (should_set_defaults(info)) - drbd_set_net_conf_defaults(new_conf); + set_net_conf_defaults(new_conf); err = net_conf_from_attrs_for_change(new_conf, info); if (err) { @@ -2029,7 +1958,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) goto fail; } - drbd_set_net_conf_defaults(new_conf); + set_net_conf_defaults(new_conf); err = net_conf_from_attrs(new_conf, info); if (err) { @@ -2301,6 +2230,11 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) return 0; } +void drbd_set_res_opts_defaults(struct res_opts *r) +{ + return set_res_opts_defaults(r); +} + int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; @@ -2325,7 +2259,7 @@ int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) res_opts = tconn->res_opts; if (should_set_defaults(info)) - drbd_set_res_opts_default(&res_opts); + set_res_opts_defaults(&res_opts); err = res_opts_from_attrs(&res_opts, info); if (err) { diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 10144d546a6..549800668cb 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -110,63 +110,62 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, * but it won't propagate through the stack */ __u32_field(5, GENLA_F_MANDATORY | GENLA_F_INVARIANT, max_bio_bvecs) - __u32_field(6, GENLA_F_MANDATORY, on_io_error) - __u32_field(7, GENLA_F_MANDATORY, fencing) + __u32_field_def(6, GENLA_F_MANDATORY, on_io_error, DRBD_ON_IO_ERROR_DEF) + __u32_field_def(7, GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF) - __u32_field(8, GENLA_F_MANDATORY, resync_rate) - __u32_field(9, GENLA_F_MANDATORY, resync_after) - __u32_field(10, GENLA_F_MANDATORY, al_extents) - __u32_field(11, GENLA_F_MANDATORY, c_plan_ahead) - __u32_field(12, GENLA_F_MANDATORY, c_delay_target) - __u32_field(13, GENLA_F_MANDATORY, c_fill_target) - __u32_field(14, GENLA_F_MANDATORY, c_max_rate) - __u32_field(15, GENLA_F_MANDATORY, c_min_rate) - - __flg_field(16, GENLA_F_MANDATORY, no_disk_barrier) - __flg_field(17, GENLA_F_MANDATORY, no_disk_flush) - __flg_field(18, GENLA_F_MANDATORY, no_disk_drain) - __flg_field(19, GENLA_F_MANDATORY, no_md_flush) + __u32_field_def(8, GENLA_F_MANDATORY, resync_rate, DRBD_RATE_DEF) + __u32_field_def(9, GENLA_F_MANDATORY, resync_after, DRBD_AFTER_DEF) + __u32_field_def(10, GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF) + __u32_field_def(11, GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF) + __u32_field_def(12, GENLA_F_MANDATORY, c_delay_target, DRBD_C_DELAY_TARGET_DEF) + __u32_field_def(13, GENLA_F_MANDATORY, c_fill_target, DRBD_C_FILL_TARGET_DEF) + __u32_field_def(14, GENLA_F_MANDATORY, c_max_rate, DRBD_C_MAX_RATE_DEF) + __u32_field_def(15, GENLA_F_MANDATORY, c_min_rate, DRBD_C_MIN_RATE_DEF) + __flg_field_def(16, GENLA_F_MANDATORY, no_disk_barrier, 0) + __flg_field_def(17, GENLA_F_MANDATORY, no_disk_flush, 0) + __flg_field_def(18, GENLA_F_MANDATORY, no_disk_drain, 0) + __flg_field_def(19, GENLA_F_MANDATORY, no_md_flush, 0) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, - __str_field(1, GENLA_F_MANDATORY, cpu_mask, 32) - __u32_field(2, GENLA_F_MANDATORY, on_no_data) + __str_field_def(1, GENLA_F_MANDATORY, cpu_mask, 32) + __u32_field_def(2, GENLA_F_MANDATORY, on_no_data, DRBD_ON_NO_DATA_DEF) ) GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __bin_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT, my_addr, 128) __bin_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT, peer_addr, 128) - __str_field(3, GENLA_F_MANDATORY | GENLA_F_SENSITIVE, + __str_field_def(3, GENLA_F_MANDATORY | GENLA_F_SENSITIVE, shared_secret, SHARED_SECRET_MAX) - __str_field(4, GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) - __str_field(5, GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) - __str_field(6, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) - __str_field(7, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) - __u32_field(8, GENLA_F_MANDATORY, wire_protocol) - __u32_field(9, GENLA_F_MANDATORY, try_connect_int) - __u32_field(10, GENLA_F_MANDATORY, timeout) - __u32_field(11, GENLA_F_MANDATORY, ping_int) - __u32_field(12, GENLA_F_MANDATORY, ping_timeo) - __u32_field(13, GENLA_F_MANDATORY, sndbuf_size) - __u32_field(14, GENLA_F_MANDATORY, rcvbuf_size) - __u32_field(15, GENLA_F_MANDATORY, ko_count) - __u32_field(16, GENLA_F_MANDATORY, max_buffers) - __u32_field(17, GENLA_F_MANDATORY, max_epoch_size) - __u32_field(18, GENLA_F_MANDATORY, unplug_watermark) - __u32_field(19, GENLA_F_MANDATORY, after_sb_0p) - __u32_field(20, GENLA_F_MANDATORY, after_sb_1p) - __u32_field(21, GENLA_F_MANDATORY, after_sb_2p) - __u32_field(22, GENLA_F_MANDATORY, rr_conflict) - __u32_field(23, GENLA_F_MANDATORY, on_congestion) - __u32_field(24, GENLA_F_MANDATORY, cong_fill) - __u32_field(25, GENLA_F_MANDATORY, cong_extents) - __flg_field(26, GENLA_F_MANDATORY, two_primaries) + __str_field_def(4, GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) + __str_field_def(5, GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) + __str_field_def(6, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) + __str_field_def(7, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) + __u32_field_def(8, GENLA_F_MANDATORY, wire_protocol, DRBD_PROTOCOL_DEF) + __u32_field_def(9, GENLA_F_MANDATORY, try_connect_int, DRBD_CONNECT_INT_DEF) + __u32_field_def(10, GENLA_F_MANDATORY, timeout, DRBD_TIMEOUT_DEF) + __u32_field_def(11, GENLA_F_MANDATORY, ping_int, DRBD_PING_INT_DEF) + __u32_field_def(12, GENLA_F_MANDATORY, ping_timeo, DRBD_PING_TIMEO_DEF) + __u32_field_def(13, GENLA_F_MANDATORY, sndbuf_size, DRBD_SNDBUF_SIZE_DEF) + __u32_field_def(14, GENLA_F_MANDATORY, rcvbuf_size, DRBD_RCVBUF_SIZE_DEF) + __u32_field_def(15, GENLA_F_MANDATORY, ko_count, DRBD_KO_COUNT_DEF) + __u32_field_def(16, GENLA_F_MANDATORY, max_buffers, DRBD_MAX_BUFFERS_DEF) + __u32_field_def(17, GENLA_F_MANDATORY, max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF) + __u32_field_def(18, GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) + __u32_field_def(19, GENLA_F_MANDATORY, after_sb_0p, DRBD_AFTER_SB_0P_DEF) + __u32_field_def(20, GENLA_F_MANDATORY, after_sb_1p, DRBD_AFTER_SB_1P_DEF) + __u32_field_def(21, GENLA_F_MANDATORY, after_sb_2p, DRBD_AFTER_SB_2P_DEF) + __u32_field_def(22, GENLA_F_MANDATORY, rr_conflict, DRBD_RR_CONFLICT_DEF) + __u32_field_def(23, GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF) + __u32_field_def(24, GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF) + __u32_field_def(25, GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) + __flg_field_def(26, GENLA_F_MANDATORY, two_primaries, 0) __flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT, want_lose) - __flg_field(28, GENLA_F_MANDATORY, no_cork) - __flg_field(29, GENLA_F_MANDATORY, always_asbp) + __flg_field_def(28, GENLA_F_MANDATORY, no_cork, 0) + __flg_field_def(29, GENLA_F_MANDATORY, always_asbp, 0) __flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT, dry_run) - __flg_field(31, GENLA_F_MANDATORY, use_rle) + __flg_field_def(31, GENLA_F_MANDATORY, use_rle, 0) ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index e458282a372..e908f1c5035 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -427,6 +427,32 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ #include GENL_MAGIC_INCLUDE_FILE + +/* Functions for initializing structs to default values. */ + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) +#undef __u32_field_def +#define __u32_field_def(attr_nr, attr_flag, name, default) \ + x->name = default; +#undef __flg_field_def +#define __flg_field_def(attr_nr, attr_flag, name, default) \ + x->name = default; +#undef __str_field_def +#define __str_field_def(attr_nr, attr_flag, name, maxlen) \ + memset(x->name, 0, sizeof(x->name)); \ + x->name ## _len = 0; +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +static void set_ ## s_name ## _defaults(struct s_name *x) __attribute__((unused)); \ +static void set_ ## s_name ## _defaults(struct s_name *x) { \ +s_fields \ +} + +#include GENL_MAGIC_INCLUDE_FILE + #endif /* __KERNEL__ */ /* }}}1 */ diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index 9a605b9ee83..f2c7cc7831d 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -107,6 +107,14 @@ enum { __array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \ nla_memcpy, NLA_PUT) +/* fields with default values */ +#define __flg_field_def(attr_nr, attr_flag, name, default) \ + __flg_field(attr_nr, attr_flag, name) +#define __u32_field_def(attr_nr, attr_flag, name, default) \ + __u32_field(attr_nr, attr_flag, name) +#define __str_field_def(attr_nr, attr_flag, name, maxlen) \ + __str_field(attr_nr, attr_flag, name, maxlen) + #define __nla_put_flag(skb, attrtype, value) \ do { \ if (value) \ From 934e6138b56203231d757d7b5ca3252edffc210e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 2 May 2011 11:24:04 +0200 Subject: [PATCH 356/609] drbd: Removed dead code Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index f5732cf46c2..8d2c3397cea 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2240,7 +2240,6 @@ int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) enum drbd_ret_code retcode; cpumask_var_t new_cpu_mask; struct drbd_tconn *tconn; - int *rs_plan_s = NULL; struct res_opts res_opts; int err; @@ -2291,7 +2290,6 @@ int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) } fail: - kfree(rs_plan_s); free_cpumask_var(new_cpu_mask); drbd_adm_finish(info, retcode); From a0095508cac5c413c7119076b26f536486314ef1 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 3 May 2011 13:14:15 +0200 Subject: [PATCH 357/609] drbd: Renamed the net_conf_update mutex to conf_update Preparing to use the same mutex for disk_conf updates Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_nl.c | 16 ++++++++-------- drivers/block/drbd/drbd_receiver.c | 10 +++++----- drivers/block/drbd/drbd_state.c | 4 ++-- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8655fcb8202..0311dfde776 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -832,7 +832,7 @@ struct drbd_tconn { /* is a resource from the config file */ unsigned long flags; struct net_conf *net_conf; /* content protected by rcu */ - struct mutex net_conf_update; /* mutex for ready-copy-update of net_conf */ + struct mutex conf_update; /* mutex for ready-copy-update of net_conf and disk_conf */ wait_queue_head_t ping_wait; /* Woken upon reception of a ping, and a state change */ struct res_opts res_opts; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 4ae3e7a99d7..e37244485d7 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2474,7 +2474,7 @@ struct drbd_tconn *conn_create(const char *name) tconn->cstate = C_STANDALONE; mutex_init(&tconn->cstate_mutex); spin_lock_init(&tconn->req_lock); - mutex_init(&tconn->net_conf_update); + mutex_init(&tconn->conf_update); init_waitqueue_head(&tconn->ping_wait); idr_init(&tconn->volumes); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 8d2c3397cea..3bad1412be6 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -597,11 +597,11 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) put_ldev(mdev); } } else { - mutex_lock(&mdev->tconn->net_conf_update); + mutex_lock(&mdev->tconn->conf_update); nc = mdev->tconn->net_conf; if (nc) nc->want_lose = 0; /* without copy; single bit op is atomic */ - mutex_unlock(&mdev->tconn->net_conf_update); + mutex_unlock(&mdev->tconn->conf_update); set_disk_ro(mdev->vdisk, false); if (get_ldev(mdev)) { @@ -1829,7 +1829,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) conn_reconfig_start(tconn); mutex_lock(&tconn->data.mutex); - mutex_lock(&tconn->net_conf_update); + mutex_lock(&tconn->conf_update); old_conf = tconn->net_conf; if (!old_conf) { @@ -1903,7 +1903,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) crypto_free_hash(tconn->cram_hmac_tfm); tconn->cram_hmac_tfm = crypto.cram_hmac_tfm; - mutex_unlock(&tconn->net_conf_update); + mutex_unlock(&tconn->conf_update); mutex_unlock(&tconn->data.mutex); synchronize_rcu(); kfree(old_conf); @@ -1914,7 +1914,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) goto done; fail: - mutex_unlock(&tconn->net_conf_update); + mutex_unlock(&tconn->conf_update); mutex_unlock(&tconn->data.mutex); free_crypto(&crypto); kfree(new_conf); @@ -2010,11 +2010,11 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) conn_flush_workqueue(tconn); - mutex_lock(&tconn->net_conf_update); + mutex_lock(&tconn->conf_update); old_conf = tconn->net_conf; if (old_conf) { retcode = ERR_NET_CONFIGURED; - mutex_unlock(&tconn->net_conf_update); + mutex_unlock(&tconn->conf_update); goto fail; } rcu_assign_pointer(tconn->net_conf, new_conf); @@ -2027,7 +2027,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) tconn->csums_tfm = crypto.csums_tfm; tconn->verify_tfm = crypto.verify_tfm; - mutex_unlock(&tconn->net_conf_update); + mutex_unlock(&tconn->conf_update); rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, i) { diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 956cdda9343..567d64b9906 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3221,7 +3221,7 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) p->csums_alg[SHARED_SECRET_MAX-1] = 0; } - mutex_lock(&mdev->tconn->net_conf_update); + mutex_lock(&mdev->tconn->conf_update); old_conf = mdev->tconn->net_conf; if (strcmp(old_conf->verify_alg, p->verify_alg)) { @@ -3296,7 +3296,7 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) } rcu_assign_pointer(tconn->net_conf, new_conf); } - mutex_unlock(&mdev->tconn->net_conf_update); + mutex_unlock(&mdev->tconn->conf_update); if (new_conf) { synchronize_rcu(); kfree(old_conf); @@ -3314,7 +3314,7 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) return 0; disconnect: - mutex_unlock(&mdev->tconn->net_conf_update); + mutex_unlock(&mdev->tconn->conf_update); /* just for completeness: actually not needed, * as this is not reached if csums_tfm was ok. */ crypto_free_hash(csums_tfm); @@ -3744,9 +3744,9 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) } } - mutex_lock(&mdev->tconn->net_conf_update); + mutex_lock(&mdev->tconn->conf_update); mdev->tconn->net_conf->want_lose = 0; /* without copy; single bit op is atomic */ - mutex_unlock(&mdev->tconn->net_conf_update); + mutex_unlock(&mdev->tconn->conf_update); drbd_md_sync(mdev); /* update connected indicator, la_size, ... */ diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 523ec094067..332781cfb55 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1410,11 +1410,11 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) if (oc == C_DISCONNECTING && ns_max.conn == C_STANDALONE) { struct net_conf *old_conf; - mutex_lock(&tconn->net_conf_update); + mutex_lock(&tconn->conf_update); old_conf = tconn->net_conf; rcu_assign_pointer(tconn->net_conf, NULL); conn_free_crypto(tconn); - mutex_unlock(&tconn->net_conf_update); + mutex_unlock(&tconn->conf_update); synchronize_rcu(); kfree(old_conf); From ef5e44a672e444b6957fdccda3f40a5cbe202335 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 3 May 2011 13:27:43 +0200 Subject: [PATCH 358/609] drbd: drbd_dew_dev_size() gets the user requests disk_size as argument Preparing RCU for disk_conf Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_nl.c | 11 ++++++----- drivers/block/drbd/drbd_receiver.c | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 0311dfde776..8c7e3409c43 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1395,7 +1395,7 @@ extern int drbd_msg_put_info(const char *info); extern void drbd_suspend_io(struct drbd_conf *mdev); extern void drbd_resume_io(struct drbd_conf *mdev); extern char *ppsize(char *buf, unsigned long long size); -extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int); +extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, sector_t, int); enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 }; extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local); extern void resync_after_online_grow(struct drbd_conf *); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 3bad1412be6..761a6b97b5c 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -775,7 +775,7 @@ void drbd_resume_io(struct drbd_conf *mdev) enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local) { sector_t prev_first_sect, prev_size; /* previous meta location */ - sector_t la_size; + sector_t la_size, u_size; sector_t size; char ppb[10]; @@ -803,7 +803,8 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds /* TODO: should only be some assert here, not (re)init... */ drbd_md_set_sector_offsets(mdev, mdev->ldev); - size = drbd_new_dev_size(mdev, mdev->ldev, flags & DDSF_FORCED); + u_size = mdev->ldev->dc.disk_size; + size = drbd_new_dev_size(mdev, mdev->ldev, u_size, flags & DDSF_FORCED); if (drbd_get_capacity(mdev->this_bdev) != size || drbd_bm_capacity(mdev) != size) { @@ -866,12 +867,12 @@ out: } sector_t -drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int assume_peer_has_space) +drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, + sector_t u_size, int assume_peer_has_space) { sector_t p_size = mdev->p_size; /* partner's disk size. */ sector_t la_size = bdev->md.la_size_sect; /* last agreed size. */ sector_t m_size; /* my size */ - sector_t u_size = bdev->dc.disk_size; /* size requested by user. */ sector_t size = 0; m_size = drbd_get_max_capacity(bdev); @@ -1406,7 +1407,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) /* Prevent shrinking of consistent devices ! */ if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && - drbd_new_dev_size(mdev, nbc, 0) < nbc->md.la_size_sect) { + drbd_new_dev_size(mdev, nbc, nbc->dc.disk_size, 0) < nbc->md.la_size_sect) { dev_warn(DEV, "refusing to truncate a consistent device\n"); retcode = ERR_DISK_TO_SMALL; goto force_diskless_dec; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 567d64b9906..23614e40254 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3379,7 +3379,7 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi) /* Never shrink a device with usable data during connect. But allow online shrinking if we are connected. */ - if (drbd_new_dev_size(mdev, mdev->ldev, 0) < + if (drbd_new_dev_size(mdev, mdev->ldev, p_usize, 0) < drbd_get_capacity(mdev->this_bdev) && mdev->state.disk >= D_OUTDATED && mdev->state.conn < C_CONNECTED) { From dc97b70801667ea8b1432b37f5c122405c8d6f96 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 3 May 2011 14:27:15 +0200 Subject: [PATCH 359/609] drbd: Split drbd_alter_sa() into drbd_sync_after_valid() and drbd_sync_after_changed() Preparing RCU for disk_conf Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 3 ++- drivers/block/drbd/drbd_nl.c | 15 +++++++-------- drivers/block/drbd/drbd_worker.c | 22 ++++++++-------------- 3 files changed, 17 insertions(+), 23 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8c7e3409c43..f8d0ac38685 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1409,7 +1409,8 @@ extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); /* drbd_worker.c */ extern int drbd_worker(struct drbd_thread *thi); -extern int drbd_alter_sa(struct drbd_conf *mdev, int na); +enum drbd_ret_code drbd_sync_after_valid(struct drbd_conf *mdev, int o_minor); +void drbd_sync_after_changed(struct drbd_conf *mdev); extern void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side); extern void resume_next_sg(struct drbd_conf *mdev); extern void suspend_other_sg(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 761a6b97b5c..a1854e3aa15 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1145,13 +1145,6 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) if (!expect(new_disk_conf->al_extents <= DRBD_AL_EXTENTS_MAX)) new_disk_conf->al_extents = DRBD_AL_EXTENTS_MAX; - /* most sanity checks done, try to assign the new sync-after - * dependency. need to hold the global lock in there, - * to avoid a race in the dependency loop check. */ - retcode = drbd_alter_sa(mdev, new_disk_conf->resync_after); - if (retcode != NO_ERROR) - goto fail; - fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ; if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); @@ -1185,7 +1178,13 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) * To avoid someone looking at a half-updated struct, we probably * should have a rw-semaphor on net_conf and disk_conf. */ - mdev->ldev->dc = *new_disk_conf; + write_lock_irq(&global_state_lock); + retcode = drbd_sync_after_valid(mdev, new_disk_conf->resync_after); + if (retcode == NO_ERROR) { + mdev->ldev->dc = *new_disk_conf; + drbd_sync_after_changed(mdev); + } + write_unlock_irq(&global_state_lock); drbd_md_sync(mdev); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 0da1547bb2d..5b645e10708 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1401,7 +1401,8 @@ void suspend_other_sg(struct drbd_conf *mdev) write_unlock_irq(&global_state_lock); } -static int sync_after_error(struct drbd_conf *mdev, int o_minor) +/* caller must hold global_state_lock */ +enum drbd_ret_code drbd_sync_after_valid(struct drbd_conf *mdev, int o_minor) { struct drbd_conf *odev; @@ -1425,22 +1426,15 @@ static int sync_after_error(struct drbd_conf *mdev, int o_minor) } } -int drbd_alter_sa(struct drbd_conf *mdev, int na) +/* caller must hold global_state_lock */ +void drbd_sync_after_changed(struct drbd_conf *mdev) { int changes; - int retcode; - write_lock_irq(&global_state_lock); - retcode = sync_after_error(mdev, na); - if (retcode == NO_ERROR) { - mdev->ldev->dc.resync_after = na; - do { - changes = _drbd_pause_after(mdev); - changes |= _drbd_resume_next(mdev); - } while (changes); - } - write_unlock_irq(&global_state_lock); - return retcode; + do { + changes = _drbd_pause_after(mdev); + changes |= _drbd_resume_next(mdev); + } while (changes); } void drbd_rs_controller_reset(struct drbd_conf *mdev) From 2ec91e0e29a3f1166ad96dbb3466b57dbc5d8ebf Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 3 May 2011 14:58:00 +0200 Subject: [PATCH 360/609] drbd: Renamed (old|new)_conf into (old|new)_net_conf in receive_SyncParam Preparing RCU for disk_conf Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 32 +++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 23614e40254..bba0050f836 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3146,7 +3146,7 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) unsigned int header_size, data_size, exp_max_sz; struct crypto_hash *verify_tfm = NULL; struct crypto_hash *csums_tfm = NULL; - struct net_conf *old_conf, *new_conf = NULL; + struct net_conf *old_net_conf, *new_net_conf = NULL; const int apv = tconn->agreed_pro_version; int *rs_plan_s = NULL; int fifo_size = 0; @@ -3222,12 +3222,12 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) } mutex_lock(&mdev->tconn->conf_update); - old_conf = mdev->tconn->net_conf; + old_net_conf = mdev->tconn->net_conf; - if (strcmp(old_conf->verify_alg, p->verify_alg)) { + if (strcmp(old_net_conf->verify_alg, p->verify_alg)) { if (mdev->state.conn == C_WF_REPORT_PARAMS) { dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n", - old_conf->verify_alg, p->verify_alg); + old_net_conf->verify_alg, p->verify_alg); goto disconnect; } verify_tfm = drbd_crypto_alloc_digest_safe(mdev, @@ -3238,10 +3238,10 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) } } - if (apv >= 89 && strcmp(old_conf->csums_alg, p->csums_alg)) { + if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) { if (mdev->state.conn == C_WF_REPORT_PARAMS) { dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n", - old_conf->csums_alg, p->csums_alg); + old_net_conf->csums_alg, p->csums_alg); goto disconnect; } csums_tfm = drbd_crypto_alloc_digest_safe(mdev, @@ -3272,34 +3272,34 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) } if (verify_tfm || csums_tfm) { - new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); - if (!new_conf) { + new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); + if (!new_net_conf) { dev_err(DEV, "Allocation of new net_conf failed\n"); goto disconnect; } - *new_conf = *old_conf; + *new_net_conf = *old_net_conf; if (verify_tfm) { - strcpy(new_conf->verify_alg, p->verify_alg); - new_conf->verify_alg_len = strlen(p->verify_alg) + 1; + strcpy(new_net_conf->verify_alg, p->verify_alg); + new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1; crypto_free_hash(mdev->tconn->verify_tfm); mdev->tconn->verify_tfm = verify_tfm; dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg); } if (csums_tfm) { - strcpy(new_conf->csums_alg, p->csums_alg); - new_conf->csums_alg_len = strlen(p->csums_alg) + 1; + strcpy(new_net_conf->csums_alg, p->csums_alg); + new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1; crypto_free_hash(mdev->tconn->csums_tfm); mdev->tconn->csums_tfm = csums_tfm; dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg); } - rcu_assign_pointer(tconn->net_conf, new_conf); + rcu_assign_pointer(tconn->net_conf, new_net_conf); } mutex_unlock(&mdev->tconn->conf_update); - if (new_conf) { + if (new_net_conf) { synchronize_rcu(); - kfree(old_conf); + kfree(old_net_conf); } spin_lock(&mdev->peer_seq_lock); From 563e4cf25ec804eb02cd30a41baa2fcc6c06679b Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 4 May 2011 10:33:52 +0200 Subject: [PATCH 361/609] drbd: Introduce __s32_field in the genetlink macro magic ...and drop explicit typecasts (int)meta_dev_idx < 0. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 8 ++++---- include/linux/drbd_genl.h | 2 +- include/linux/genl_magic_struct.h | 3 +++ 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index a1854e3aa15..b8ea4807c98 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1253,7 +1253,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto fail; } - if ((int)nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { + if (nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { retcode = ERR_MD_IDX_INVALID; goto fail; } @@ -1289,7 +1289,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) */ bdev = blkdev_get_by_path(nbc->dc.meta_dev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, - ((int)nbc->dc.meta_dev_idx < 0) ? + (nbc->dc.meta_dev_idx < 0) ? (void *)mdev : (void *)drbd_m_holder); if (IS_ERR(bdev)) { dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev, @@ -1325,7 +1325,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto fail; } - if ((int)nbc->dc.meta_dev_idx < 0) { + if (nbc->dc.meta_dev_idx < 0) { max_possible_sectors = DRBD_MAX_SECTORS_FLEX; /* at least one MB, otherwise it does not make sense */ min_md_device_sectors = (2<<10); @@ -1356,7 +1356,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) dev_warn(DEV, "==> truncating very big lower level device " "to currently maximum possible %llu sectors <==\n", (unsigned long long) max_possible_sectors); - if ((int)nbc->dc.meta_dev_idx >= 0) + if (nbc->dc.meta_dev_idx >= 0) dev_warn(DEV, "==>> using internal or flexible " "meta data may help <<==\n"); } diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 549800668cb..f143e3c0f33 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -102,7 +102,7 @@ GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __str_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT, backing_dev, 128) __str_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT, meta_dev, 128) - __u32_field(3, GENLA_F_REQUIRED | GENLA_F_INVARIANT, meta_dev_idx) + __s32_field(3, GENLA_F_REQUIRED | GENLA_F_INVARIANT, meta_dev_idx) /* use the resize command to try and change the disk_size */ __u64_field(4, GENLA_F_MANDATORY | GENLA_F_INVARIANT, disk_size) diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index f2c7cc7831d..ddbdd0a2447 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -97,6 +97,9 @@ enum { #define __u32_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U32, __u32, \ nla_get_u32, NLA_PUT_U32) +#define __s32_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U32, __s32, \ + nla_get_u32, NLA_PUT_U32) #define __u64_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U64, __u64, \ nla_get_u64, NLA_PUT_U64) From daeda1cca91d58bb6c8e45f6734f021bab9c28b7 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 3 May 2011 15:00:55 +0200 Subject: [PATCH 362/609] drbd: RCU for disk_conf Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 54 +++++++++-- drivers/block/drbd/drbd_main.c | 25 +++-- drivers/block/drbd/drbd_nl.c | 144 +++++++++++++++++++---------- drivers/block/drbd/drbd_receiver.c | 140 ++++++++++++++++++---------- drivers/block/drbd/drbd_state.c | 16 +++- drivers/block/drbd/drbd_worker.c | 38 +++++--- 6 files changed, 283 insertions(+), 134 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index f8d0ac38685..cd77dd497b9 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -777,7 +777,7 @@ struct drbd_backing_dev { struct block_device *backing_bdev; struct block_device *md_bdev; struct drbd_md md; - struct disk_conf dc; /* The user provided config... */ + struct disk_conf *disk_conf; /* RCU, for updates: mdev->tconn->conf_update */ sector_t known_size; /* last known size of that backing device */ }; @@ -1644,8 +1644,13 @@ static inline union drbd_state drbd_read_state(struct drbd_conf *mdev) #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where) { - switch (mdev->ldev->dc.on_io_error) { - case EP_PASS_ON: + enum drbd_io_error_p ep; + + rcu_read_lock(); + ep = rcu_dereference(mdev->ldev->disk_conf)->on_io_error; + rcu_read_unlock(); + switch (ep) { + case EP_PASS_ON: /* FIXME would this be better named "Ignore"? */ if (!forcedetach) { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Local IO failed in %s.\n", where); @@ -1694,9 +1699,9 @@ static inline void drbd_chk_io_error_(struct drbd_conf *mdev, * BTW, for internal meta data, this happens to be the maximum capacity * we could agree upon with our peer node. */ -static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev) +static inline sector_t _drbd_md_first_sector(int meta_dev_idx, struct drbd_backing_dev *bdev) { - switch (bdev->dc.meta_dev_idx) { + switch (meta_dev_idx) { case DRBD_MD_INDEX_INTERNAL: case DRBD_MD_INDEX_FLEX_INT: return bdev->md.md_offset + bdev->md.bm_offset; @@ -1706,13 +1711,30 @@ static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev) } } +static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev) +{ + int meta_dev_idx; + + rcu_read_lock(); + meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx; + rcu_read_unlock(); + + return _drbd_md_first_sector(meta_dev_idx, bdev); +} + /** * drbd_md_last_sector() - Return the last sector number of the meta data area * @bdev: Meta data block device. */ static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev) { - switch (bdev->dc.meta_dev_idx) { + int meta_dev_idx; + + rcu_read_lock(); + meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx; + rcu_read_unlock(); + + switch (meta_dev_idx) { case DRBD_MD_INDEX_INTERNAL: case DRBD_MD_INDEX_FLEX_INT: return bdev->md.md_offset + MD_AL_OFFSET - 1; @@ -1740,12 +1762,18 @@ static inline sector_t drbd_get_capacity(struct block_device *bdev) static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev) { sector_t s; - switch (bdev->dc.meta_dev_idx) { + int meta_dev_idx; + + rcu_read_lock(); + meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx; + rcu_read_unlock(); + + switch (meta_dev_idx) { case DRBD_MD_INDEX_INTERNAL: case DRBD_MD_INDEX_FLEX_INT: s = drbd_get_capacity(bdev->backing_bdev) ? min_t(sector_t, DRBD_MAX_SECTORS_FLEX, - drbd_md_first_sector(bdev)) + _drbd_md_first_sector(meta_dev_idx, bdev)) : 0; break; case DRBD_MD_INDEX_FLEX_EXT: @@ -1771,9 +1799,15 @@ static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev) static inline sector_t drbd_md_ss__(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) { - switch (bdev->dc.meta_dev_idx) { + int meta_dev_idx; + + rcu_read_lock(); + meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx; + rcu_read_unlock(); + + switch (meta_dev_idx) { default: /* external, some index */ - return MD_RESERVED_SECT * bdev->dc.meta_dev_idx; + return MD_RESERVED_SECT * meta_dev_idx; case DRBD_MD_INDEX_INTERNAL: /* with drbd08, internal meta data is always "flexible" */ case DRBD_MD_INDEX_FLEX_INT: diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index e37244485d7..de6afa75dec 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -866,6 +866,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev) const int apv = mdev->tconn->agreed_pro_version; enum drbd_packet cmd; struct net_conf *nc; + struct disk_conf *dc; sock = &mdev->tconn->data; p = drbd_prepare_command(mdev, sock); @@ -887,11 +888,12 @@ int drbd_send_sync_param(struct drbd_conf *mdev) memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); if (get_ldev(mdev)) { - p->rate = cpu_to_be32(mdev->ldev->dc.resync_rate); - p->c_plan_ahead = cpu_to_be32(mdev->ldev->dc.c_plan_ahead); - p->c_delay_target = cpu_to_be32(mdev->ldev->dc.c_delay_target); - p->c_fill_target = cpu_to_be32(mdev->ldev->dc.c_fill_target); - p->c_max_rate = cpu_to_be32(mdev->ldev->dc.c_max_rate); + dc = rcu_dereference(mdev->ldev->disk_conf); + p->rate = cpu_to_be32(dc->resync_rate); + p->c_plan_ahead = cpu_to_be32(dc->c_plan_ahead); + p->c_delay_target = cpu_to_be32(dc->c_delay_target); + p->c_fill_target = cpu_to_be32(dc->c_fill_target); + p->c_max_rate = cpu_to_be32(dc->c_max_rate); put_ldev(mdev); } else { p->rate = cpu_to_be32(DRBD_RATE_DEF); @@ -1056,7 +1058,9 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl if (get_ldev_if_state(mdev, D_NEGOTIATING)) { D_ASSERT(mdev->ldev->backing_bdev); d_size = drbd_get_max_capacity(mdev->ldev); - u_size = mdev->ldev->dc.disk_size; + rcu_read_lock(); + u_size = rcu_dereference(mdev->ldev->disk_conf)->disk_size; + rcu_read_unlock(); q_order_type = drbd_queue_order_type(mdev); max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9; max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE); @@ -2889,7 +2893,6 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) for (i = UI_CURRENT; i < UI_SIZE; i++) bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]); bdev->md.flags = be32_to_cpu(buffer->flags); - bdev->dc.al_extents = be32_to_cpu(buffer->al_nr_extents); bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid); spin_lock_irq(&mdev->tconn->req_lock); @@ -2901,8 +2904,12 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) } spin_unlock_irq(&mdev->tconn->req_lock); - if (bdev->dc.al_extents < 7) - bdev->dc.al_extents = 127; + mutex_lock(&mdev->tconn->conf_update); + /* This blocks wants to be get removed... */ + bdev->disk_conf->al_extents = be32_to_cpu(buffer->al_nr_extents); + if (bdev->disk_conf->al_extents < DRBD_AL_EXTENTS_MIN) + bdev->disk_conf->al_extents = DRBD_AL_EXTENTS_DEF; + mutex_unlock(&mdev->tconn->conf_update); err: mutex_unlock(&mdev->md_io_mutex); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index b8ea4807c98..ea62838e079 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -384,7 +384,8 @@ static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn) rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) { if (get_ldev_if_state(mdev, D_CONSISTENT)) { - fp = max_t(enum drbd_fencing_p, fp, mdev->ldev->dc.fencing); + fp = max_t(enum drbd_fencing_p, fp, + rcu_dereference(mdev->ldev->disk_conf)->fencing); put_ldev(mdev); } } @@ -678,7 +679,12 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) { sector_t md_size_sect = 0; - switch (bdev->dc.meta_dev_idx) { + int meta_dev_idx; + + rcu_read_lock(); + meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx; + + switch (meta_dev_idx) { default: /* v07 style fixed size indexed meta data */ bdev->md.md_size_sect = MD_RESERVED_SECT; @@ -713,6 +719,7 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, bdev->md.bm_offset = -md_size_sect + MD_AL_OFFSET; break; } + rcu_read_unlock(); } /* input size is expected to be in KB */ @@ -803,7 +810,9 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds /* TODO: should only be some assert here, not (re)init... */ drbd_md_set_sector_offsets(mdev, mdev->ldev); - u_size = mdev->ldev->dc.disk_size; + rcu_read_lock(); + u_size = rcu_dereference(mdev->ldev->disk_conf)->disk_size; + rcu_read_unlock(); size = drbd_new_dev_size(mdev, mdev->ldev, u_size, flags & DDSF_FORCED); if (drbd_get_capacity(mdev->this_bdev) != size || @@ -979,7 +988,9 @@ static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_ struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); - max_segments = mdev->ldev->dc.max_bio_bvecs; + rcu_read_lock(); + max_segments = rcu_dereference(mdev->ldev->disk_conf)->max_bio_bvecs; + rcu_read_unlock(); put_ldev(mdev); } @@ -1095,7 +1106,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; struct drbd_conf *mdev; - struct disk_conf *new_disk_conf; + struct disk_conf *new_disk_conf, *old_disk_conf; int err, fifo_size; int *rs_plan_s = NULL; @@ -1114,19 +1125,15 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) goto out; } -/* FIXME freeze IO, cluster wide. - * - * We should make sure no-one uses - * some half-updated struct when we - * assign it later. */ - - new_disk_conf = kmalloc(sizeof(*new_disk_conf), GFP_KERNEL); + new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL); if (!new_disk_conf) { retcode = ERR_NOMEM; goto fail; } - memcpy(new_disk_conf, &mdev->ldev->dc, sizeof(*new_disk_conf)); + mutex_lock(&mdev->tconn->conf_update); + old_disk_conf = mdev->ldev->disk_conf; + *new_disk_conf = *old_disk_conf; if (should_set_defaults(info)) set_disk_conf_defaults(new_disk_conf); @@ -1151,7 +1158,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) if (!rs_plan_s) { dev_err(DEV, "kmalloc of fifo_buffer failed"); retcode = ERR_NOMEM; - goto fail; + goto fail_unlock; } } @@ -1171,31 +1178,37 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) if (err) { retcode = ERR_NOMEM; - goto fail; + goto fail_unlock; } - /* FIXME - * To avoid someone looking at a half-updated struct, we probably - * should have a rw-semaphor on net_conf and disk_conf. - */ write_lock_irq(&global_state_lock); retcode = drbd_sync_after_valid(mdev, new_disk_conf->resync_after); if (retcode == NO_ERROR) { - mdev->ldev->dc = *new_disk_conf; + rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf); drbd_sync_after_changed(mdev); } write_unlock_irq(&global_state_lock); - drbd_md_sync(mdev); + if (retcode != NO_ERROR) + goto fail_unlock; + drbd_md_sync(mdev); if (mdev->state.conn >= C_CONNECTED) drbd_send_sync_param(mdev); + mutex_unlock(&mdev->tconn->conf_update); + synchronize_rcu(); + kfree(old_disk_conf); + goto success; + +fail_unlock: + mutex_unlock(&mdev->tconn->conf_update); fail: - put_ldev(mdev); kfree(new_disk_conf); kfree(rs_plan_s); +success: + put_ldev(mdev); out: drbd_adm_finish(info, retcode); return 0; @@ -1210,6 +1223,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) sector_t max_possible_sectors; sector_t min_md_device_sectors; struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */ + struct disk_conf *new_disk_conf = NULL; struct block_device *bdev; struct lru_cache *resync_lru = NULL; union drbd_state ns, os; @@ -1243,17 +1257,22 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) retcode = ERR_NOMEM; goto fail; } + new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); + if (!new_disk_conf) { + retcode = ERR_NOMEM; + goto fail; + } + nbc->disk_conf = new_disk_conf; - set_disk_conf_defaults(&nbc->dc); - - err = disk_conf_from_attrs(&nbc->dc, info); + set_disk_conf_defaults(new_disk_conf); + err = disk_conf_from_attrs(new_disk_conf, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); goto fail; } - if (nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { + if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { retcode = ERR_MD_IDX_INVALID; goto fail; } @@ -1261,7 +1280,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) rcu_read_lock(); nc = rcu_dereference(mdev->tconn->net_conf); if (nc) { - if (nbc->dc.fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) { + if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) { rcu_read_unlock(); retcode = ERR_STONITH_AND_PROT_A; goto fail; @@ -1269,10 +1288,10 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } rcu_read_unlock(); - bdev = blkdev_get_by_path(nbc->dc.backing_dev, + bdev = blkdev_get_by_path(new_disk_conf->backing_dev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, mdev); if (IS_ERR(bdev)) { - dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev, + dev_err(DEV, "open(\"%s\") failed with %ld\n", new_disk_conf->backing_dev, PTR_ERR(bdev)); retcode = ERR_OPEN_DISK; goto fail; @@ -1287,12 +1306,12 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) * should check it for you already; but if you don't, or * someone fooled it, we need to double check here) */ - bdev = blkdev_get_by_path(nbc->dc.meta_dev, + bdev = blkdev_get_by_path(new_disk_conf->meta_dev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, - (nbc->dc.meta_dev_idx < 0) ? + (new_disk_conf->meta_dev_idx < 0) ? (void *)mdev : (void *)drbd_m_holder); if (IS_ERR(bdev)) { - dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev, + dev_err(DEV, "open(\"%s\") failed with %ld\n", new_disk_conf->meta_dev, PTR_ERR(bdev)); retcode = ERR_OPEN_MD_DISK; goto fail; @@ -1300,8 +1319,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) nbc->md_bdev = bdev; if ((nbc->backing_bdev == nbc->md_bdev) != - (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL || - nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) { + (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL || + new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) { retcode = ERR_MD_IDX_INVALID; goto fail; } @@ -1317,21 +1336,21 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */ drbd_md_set_sector_offsets(mdev, nbc); - if (drbd_get_max_capacity(nbc) < nbc->dc.disk_size) { + if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) { dev_err(DEV, "max capacity %llu smaller than disk size %llu\n", (unsigned long long) drbd_get_max_capacity(nbc), - (unsigned long long) nbc->dc.disk_size); + (unsigned long long) new_disk_conf->disk_size); retcode = ERR_DISK_TO_SMALL; goto fail; } - if (nbc->dc.meta_dev_idx < 0) { + if (new_disk_conf->meta_dev_idx < 0) { max_possible_sectors = DRBD_MAX_SECTORS_FLEX; /* at least one MB, otherwise it does not make sense */ min_md_device_sectors = (2<<10); } else { max_possible_sectors = DRBD_MAX_SECTORS; - min_md_device_sectors = MD_RESERVED_SECT * (nbc->dc.meta_dev_idx + 1); + min_md_device_sectors = MD_RESERVED_SECT * (new_disk_conf->meta_dev_idx + 1); } if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) { @@ -1356,7 +1375,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) dev_warn(DEV, "==> truncating very big lower level device " "to currently maximum possible %llu sectors <==\n", (unsigned long long) max_possible_sectors); - if (nbc->dc.meta_dev_idx >= 0) + if (new_disk_conf->meta_dev_idx >= 0) dev_warn(DEV, "==>> using internal or flexible " "meta data may help <<==\n"); } @@ -1399,14 +1418,14 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } /* Since we are diskless, fix the activity log first... */ - if (drbd_check_al_size(mdev, &nbc->dc)) { + if (drbd_check_al_size(mdev, new_disk_conf)) { retcode = ERR_NOMEM; goto force_diskless_dec; } /* Prevent shrinking of consistent devices ! */ if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && - drbd_new_dev_size(mdev, nbc, nbc->dc.disk_size, 0) < nbc->md.la_size_sect) { + drbd_new_dev_size(mdev, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) { dev_warn(DEV, "refusing to truncate a consistent device\n"); retcode = ERR_DISK_TO_SMALL; goto force_diskless_dec; @@ -1419,11 +1438,13 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) /* Reset the "barriers don't work" bits here, then force meta data to * be written, to ensure we determine if barriers are supported. */ - if (nbc->dc.no_md_flush) + if (new_disk_conf->no_md_flush) set_bit(MD_NO_FUA, &mdev->flags); else clear_bit(MD_NO_FUA, &mdev->flags); + /* FIXME Missing stuff: rs_plan_s, clip al range */ + /* Point of no return reached. * Devices and memory are no longer released by error cleanup below. * now mdev takes over responsibility, and the state engine should @@ -1433,6 +1454,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) mdev->resync = resync_lru; nbc = NULL; resync_lru = NULL; + new_disk_conf = NULL; mdev->write_ordering = WO_bdev_flush; drbd_bump_write_ordering(mdev, WO_bdev_flush); @@ -1530,9 +1552,11 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) if (drbd_md_test_flag(mdev->ldev, MDF_PEER_OUT_DATED)) ns.pdsk = D_OUTDATED; - if ( ns.disk == D_CONSISTENT && - (ns.pdsk == D_OUTDATED || mdev->ldev->dc.fencing == FP_DONT_CARE)) + rcu_read_lock(); + if (ns.disk == D_CONSISTENT && + (ns.pdsk == D_OUTDATED || rcu_dereference(mdev->ldev->disk_conf)->fencing == FP_DONT_CARE)) ns.disk = D_UP_TO_DATE; + rcu_read_unlock(); /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND, MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before @@ -1589,6 +1613,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) FMODE_READ | FMODE_WRITE | FMODE_EXCL); kfree(nbc); } + kfree(new_disk_conf); lc_destroy(resync_lru); finish: @@ -1691,7 +1716,7 @@ _check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct n idr_for_each_entry(&tconn->volumes, mdev, i) { if (get_ldev(mdev)) { - enum drbd_fencing_p fp = mdev->ldev->dc.fencing; + enum drbd_fencing_p fp = rcu_dereference(mdev->ldev->disk_conf)->fencing; put_ldev(mdev); if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) return ERR_STONITH_AND_PROT_A; @@ -2159,11 +2184,13 @@ void resync_after_online_grow(struct drbd_conf *mdev) int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) { + struct disk_conf *old_disk_conf, *new_disk_conf = NULL; struct resize_parms rs; struct drbd_conf *mdev; enum drbd_ret_code retcode; enum determine_dev_size dd; enum dds_flags ddsf; + sector_t u_size; int err; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); @@ -2204,10 +2231,31 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) goto fail; } + rcu_read_lock(); + u_size = rcu_dereference(mdev->ldev->disk_conf)->disk_size; + rcu_read_unlock(); + if (u_size != (sector_t)rs.resize_size) { + new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL); + if (!new_disk_conf) { + retcode = ERR_NOMEM; + goto fail; + } + } + if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev); - mdev->ldev->dc.disk_size = (sector_t)rs.resize_size; + if (new_disk_conf) { + mutex_lock(&mdev->tconn->conf_update); + old_disk_conf = mdev->ldev->disk_conf; + *new_disk_conf = *old_disk_conf; + new_disk_conf->disk_size = (sector_t)rs.resize_size; + rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf); + mutex_unlock(&mdev->tconn->conf_update); + synchronize_rcu(); + kfree(old_disk_conf); + } + ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0); dd = drbd_determine_dev_size(mdev, ddsf); drbd_md_sync(mdev); @@ -2501,11 +2549,11 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, if (res_opts_to_skb(skb, &mdev->tconn->res_opts, exclude_sensitive)) goto nla_put_failure; + rcu_read_lock(); if (got_ldev) - if (disk_conf_to_skb(skb, &mdev->ldev->dc, exclude_sensitive)) + if (disk_conf_to_skb(skb, rcu_dereference(mdev->ldev->disk_conf), exclude_sensitive)) goto nla_put_failure; - rcu_read_lock(); nc = rcu_dereference(mdev->tconn->net_conf); if (nc) err = net_conf_to_skb(skb, nc, exclude_sensitive); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index bba0050f836..add41764ec5 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1166,6 +1166,7 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, */ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local) { + struct disk_conf *dc; enum write_ordering_e pwo; static char *write_ordering_str[] = { [WO_none] = "none", @@ -1175,10 +1176,14 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) pwo = mdev->write_ordering; wo = min(pwo, wo); - if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush) + rcu_read_lock(); + dc = rcu_dereference(mdev->ldev->disk_conf); + + if (wo == WO_bdev_flush && dc->no_disk_flush) wo = WO_drain_io; - if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain) + if (wo == WO_drain_io && dc->no_disk_drain) wo = WO_none; + rcu_read_unlock(); mdev->write_ordering = wo; if (pwo != mdev->write_ordering || wo == WO_bdev_flush) dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]); @@ -2190,9 +2195,14 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector) struct lc_element *tmp; int curr_events; int throttle = 0; + unsigned int c_min_rate; + + rcu_read_lock(); + c_min_rate = rcu_dereference(mdev->ldev->disk_conf)->c_min_rate; + rcu_read_unlock(); /* feature disabled? */ - if (mdev->ldev->dc.c_min_rate == 0) + if (c_min_rate == 0) return 0; spin_lock_irq(&mdev->al_lock); @@ -2232,7 +2242,7 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector) db = mdev->rs_mark_left[i] - rs_left; dbdt = Bit2KB(db/dt); - if (dbdt > mdev->ldev->dc.c_min_rate) + if (dbdt > c_min_rate) throttle = 1; } return throttle; @@ -3147,6 +3157,7 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) struct crypto_hash *verify_tfm = NULL; struct crypto_hash *csums_tfm = NULL; struct net_conf *old_net_conf, *new_net_conf = NULL; + struct disk_conf *old_disk_conf, *new_disk_conf = NULL; const int apv = tconn->agreed_pro_version; int *rs_plan_s = NULL; int fifo_size = 0; @@ -3189,24 +3200,34 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) if (err) return err; - if (get_ldev(mdev)) { - mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate); - put_ldev(mdev); + new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); + if (!new_disk_conf) { + dev_err(DEV, "Allocation of new disk_conf failed\n"); + return -ENOMEM; } + mutex_lock(&mdev->tconn->conf_update); + old_net_conf = mdev->tconn->net_conf; + old_disk_conf = mdev->ldev->disk_conf; + *new_disk_conf = *old_disk_conf; + + new_disk_conf->resync_rate = be32_to_cpu(p->rate); + if (apv >= 88) { if (apv == 88) { if (data_size > SHARED_SECRET_MAX) { dev_err(DEV, "verify-alg too long, " "peer wants %u, accepting only %u byte\n", data_size, SHARED_SECRET_MAX); + mutex_unlock(&mdev->tconn->conf_update); return -EIO; } err = drbd_recv_all(mdev->tconn, p->verify_alg, data_size); - if (err) + if (err) { + mutex_unlock(&mdev->tconn->conf_update); return err; - + } /* we expect NUL terminated string */ /* but just in case someone tries to be evil */ D_ASSERT(p->verify_alg[data_size-1] == 0); @@ -3221,9 +3242,6 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) p->csums_alg[SHARED_SECRET_MAX-1] = 0; } - mutex_lock(&mdev->tconn->conf_update); - old_net_conf = mdev->tconn->net_conf; - if (strcmp(old_net_conf->verify_alg, p->verify_alg)) { if (mdev->state.conn == C_WF_REPORT_PARAMS) { dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n", @@ -3252,14 +3270,13 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) } } - if (apv > 94 && get_ldev(mdev)) { - mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate); - mdev->ldev->dc.c_plan_ahead = be32_to_cpu(p->c_plan_ahead); - mdev->ldev->dc.c_delay_target = be32_to_cpu(p->c_delay_target); - mdev->ldev->dc.c_fill_target = be32_to_cpu(p->c_fill_target); - mdev->ldev->dc.c_max_rate = be32_to_cpu(p->c_max_rate); + if (apv > 94) { + new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead); + new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target); + new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target); + new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate); - fifo_size = (mdev->ldev->dc.c_plan_ahead * 10 * SLEEP_TIME) / HZ; + fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ; if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); if (!rs_plan_s) { @@ -3268,7 +3285,6 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) goto disconnect; } } - put_ldev(mdev); } if (verify_tfm || csums_tfm) { @@ -3296,21 +3312,24 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) } rcu_assign_pointer(tconn->net_conf, new_net_conf); } - mutex_unlock(&mdev->tconn->conf_update); - if (new_net_conf) { - synchronize_rcu(); - kfree(old_net_conf); - } - - spin_lock(&mdev->peer_seq_lock); - if (fifo_size != mdev->rs_plan_s.size) { - kfree(mdev->rs_plan_s.values); - mdev->rs_plan_s.values = rs_plan_s; - mdev->rs_plan_s.size = fifo_size; - mdev->rs_planed = 0; - } - spin_unlock(&mdev->peer_seq_lock); } + + rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf); + spin_lock(&mdev->peer_seq_lock); + if (rs_plan_s) { + kfree(mdev->rs_plan_s.values); + mdev->rs_plan_s.values = rs_plan_s; + mdev->rs_plan_s.size = fifo_size; + mdev->rs_planed = 0; + } + spin_unlock(&mdev->peer_seq_lock); + + mutex_unlock(&mdev->tconn->conf_update); + synchronize_rcu(); + if (new_net_conf) + kfree(old_net_conf); + kfree(old_disk_conf); + return 0; disconnect: @@ -3358,37 +3377,56 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi) mdev->p_size = p_size; if (get_ldev(mdev)) { + rcu_read_lock(); + my_usize = rcu_dereference(mdev->ldev->disk_conf)->disk_size; + rcu_read_unlock(); + warn_if_differ_considerably(mdev, "lower level device sizes", p_size, drbd_get_max_capacity(mdev->ldev)); warn_if_differ_considerably(mdev, "user requested size", - p_usize, mdev->ldev->dc.disk_size); + p_usize, my_usize); /* if this is the first connect, or an otherwise expected * param exchange, choose the minimum */ if (mdev->state.conn == C_WF_REPORT_PARAMS) - p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size, - p_usize); - - my_usize = mdev->ldev->dc.disk_size; - - if (mdev->ldev->dc.disk_size != p_usize) { - mdev->ldev->dc.disk_size = p_usize; - dev_info(DEV, "Peer sets u_size to %lu sectors\n", - (unsigned long)mdev->ldev->dc.disk_size); - } + p_usize = min_not_zero(my_usize, p_usize); /* Never shrink a device with usable data during connect. But allow online shrinking if we are connected. */ if (drbd_new_dev_size(mdev, mdev->ldev, p_usize, 0) < - drbd_get_capacity(mdev->this_bdev) && - mdev->state.disk >= D_OUTDATED && - mdev->state.conn < C_CONNECTED) { + drbd_get_capacity(mdev->this_bdev) && + mdev->state.disk >= D_OUTDATED && + mdev->state.conn < C_CONNECTED) { dev_err(DEV, "The peer's disk size is too small!\n"); conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); - mdev->ldev->dc.disk_size = my_usize; put_ldev(mdev); return -EIO; } + + if (my_usize != p_usize) { + struct disk_conf *old_disk_conf, *new_disk_conf = NULL; + + new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); + if (!new_disk_conf) { + dev_err(DEV, "Allocation of new disk_conf failed\n"); + put_ldev(mdev); + return -ENOMEM; + } + + mutex_lock(&mdev->tconn->conf_update); + old_disk_conf = mdev->ldev->disk_conf; + *new_disk_conf = *old_disk_conf; + new_disk_conf->disk_size = p_usize; + + rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf); + mutex_unlock(&mdev->tconn->conf_update); + synchronize_rcu(); + kfree(old_disk_conf); + + dev_info(DEV, "Peer sets u_size to %lu sectors\n", + (unsigned long)my_usize); + } + put_ldev(mdev); } @@ -4268,7 +4306,9 @@ static int drbd_disconnected(int vnr, void *p, void *data) fp = FP_DONT_CARE; if (get_ldev(mdev)) { - fp = mdev->ldev->dc.fencing; + rcu_read_lock(); + fp = rcu_dereference(mdev->ldev->disk_conf)->fencing; + rcu_read_unlock(); put_ldev(mdev); } diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 332781cfb55..cd55f46d5c5 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -483,13 +483,13 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) enum drbd_state_rv rv = SS_SUCCESS; struct net_conf *nc; + rcu_read_lock(); fp = FP_DONT_CARE; if (get_ldev(mdev)) { - fp = mdev->ldev->dc.fencing; + fp = rcu_dereference(mdev->ldev->disk_conf)->fencing; put_ldev(mdev); } - rcu_read_lock(); nc = rcu_dereference(mdev->tconn->net_conf); if (nc) { if (!nc->two_primaries && ns.role == R_PRIMARY) { @@ -674,7 +674,9 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state fp = FP_DONT_CARE; if (get_ldev(mdev)) { - fp = mdev->ldev->dc.fencing; + rcu_read_lock(); + fp = rcu_dereference(mdev->ldev->disk_conf)->fencing; + rcu_read_unlock(); put_ldev(mdev); } @@ -1132,7 +1134,9 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, fp = FP_DONT_CARE; if (get_ldev(mdev)) { - fp = mdev->ldev->dc.fencing; + rcu_read_lock(); + fp = rcu_dereference(mdev->ldev->disk_conf)->fencing; + rcu_read_unlock(); put_ldev(mdev); } @@ -1287,7 +1291,9 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* corresponding get_ldev was in __drbd_set_state, to serialize * our cleanup here with the transition to D_DISKLESS, * so it is safe to dreference ldev here. */ - eh = mdev->ldev->dc.on_io_error; + rcu_read_lock(); + eh = rcu_dereference(mdev->ldev->disk_conf)->on_io_error; + rcu_read_unlock(); was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); /* current state still has to be D_FAILED, diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 5b645e10708..4f45f75173a 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -436,6 +436,7 @@ static void fifo_add_val(struct fifo_buffer *fb, int value) static int drbd_rs_controller(struct drbd_conf *mdev) { + struct disk_conf *dc; unsigned int sect_in; /* Number of sectors that came in since the last turn */ unsigned int want; /* The number of sectors we want in the proxy */ int req_sect; /* Number of sectors to request in this turn */ @@ -449,14 +450,16 @@ static int drbd_rs_controller(struct drbd_conf *mdev) mdev->rs_in_flight -= sect_in; spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */ + rcu_read_lock(); + dc = rcu_dereference(mdev->ldev->disk_conf); - steps = mdev->rs_plan_s.size; /* (mdev->ldev->dc.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ + steps = mdev->rs_plan_s.size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */ - want = ((mdev->ldev->dc.resync_rate * 2 * SLEEP_TIME) / HZ) * steps; + want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps; } else { /* normal path */ - want = mdev->ldev->dc.c_fill_target ? mdev->ldev->dc.c_fill_target : - sect_in * mdev->ldev->dc.c_delay_target * HZ / (SLEEP_TIME * 10); + want = dc->c_fill_target ? dc->c_fill_target : + sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10); } correction = want - mdev->rs_in_flight - mdev->rs_planed; @@ -468,14 +471,13 @@ static int drbd_rs_controller(struct drbd_conf *mdev) /* What we do in this step */ curr_corr = fifo_push(&mdev->rs_plan_s, 0); - spin_unlock(&mdev->peer_seq_lock); mdev->rs_planed -= curr_corr; req_sect = sect_in + curr_corr; if (req_sect < 0) req_sect = 0; - max_sect = (mdev->ldev->dc.c_max_rate * 2 * SLEEP_TIME) / HZ; + max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ; if (req_sect > max_sect) req_sect = max_sect; @@ -484,6 +486,8 @@ static int drbd_rs_controller(struct drbd_conf *mdev) sect_in, mdev->rs_in_flight, want, correction, steps, cps, mdev->rs_planed, curr_corr, req_sect); */ + rcu_read_unlock(); + spin_unlock(&mdev->peer_seq_lock); return req_sect; } @@ -491,11 +495,13 @@ static int drbd_rs_controller(struct drbd_conf *mdev) static int drbd_rs_number_requests(struct drbd_conf *mdev) { int number; - if (mdev->rs_plan_s.size) { /* mdev->ldev->dc.c_plan_ahead */ + if (mdev->rs_plan_s.size) { /* rcu_dereference(mdev->ldev->disk_conf)->c_plan_ahead */ number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9); mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; } else { - mdev->c_sync_rate = mdev->ldev->dc.resync_rate; + rcu_read_lock(); + mdev->c_sync_rate = rcu_dereference(mdev->ldev->disk_conf)->resync_rate; + rcu_read_unlock(); number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); } @@ -1320,13 +1326,17 @@ int w_restart_disk_io(struct drbd_work *w, int cancel) static int _drbd_may_sync_now(struct drbd_conf *mdev) { struct drbd_conf *odev = mdev; + int ra; while (1) { if (!odev->ldev) return 1; - if (odev->ldev->dc.resync_after == -1) + rcu_read_lock(); + ra = rcu_dereference(odev->ldev->disk_conf)->resync_after; + rcu_read_unlock(); + if (ra == -1) return 1; - odev = minor_to_mdev(odev->ldev->dc.resync_after); + odev = minor_to_mdev(ra); if (!expect(odev)) return 1; if ((odev->state.conn >= C_SYNC_SOURCE && @@ -1405,6 +1415,7 @@ void suspend_other_sg(struct drbd_conf *mdev) enum drbd_ret_code drbd_sync_after_valid(struct drbd_conf *mdev, int o_minor) { struct drbd_conf *odev; + int ra; if (o_minor == -1) return NO_ERROR; @@ -1417,12 +1428,15 @@ enum drbd_ret_code drbd_sync_after_valid(struct drbd_conf *mdev, int o_minor) if (odev == mdev) return ERR_SYNC_AFTER_CYCLE; + rcu_read_lock(); + ra = rcu_dereference(odev->ldev->disk_conf)->resync_after; + rcu_read_unlock(); /* dependency chain ends here, no cycles. */ - if (odev->ldev->dc.resync_after == -1) + if (ra == -1) return NO_ERROR; /* follow the dependency chain */ - odev = minor_to_mdev(odev->ldev->dc.resync_after); + odev = minor_to_mdev(ra); } } From 9958c857c760eec76f4fdf288b6f33a1c3b41833 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 3 May 2011 16:19:31 +0200 Subject: [PATCH 363/609] drbd: Made the fifo object a self contained object (preparing for RCU) * Moved rs_planed into it, named total * When having a pointer to the object the values can be embedded into the fifo object. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 7 +++--- drivers/block/drbd/drbd_main.c | 1 + drivers/block/drbd/drbd_nl.c | 34 +++++++++++++++++++----------- drivers/block/drbd/drbd_receiver.c | 13 ++++++------ drivers/block/drbd/drbd_worker.c | 33 +++++++++++++++++++++-------- 5 files changed, 57 insertions(+), 31 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index cd77dd497b9..2ecee6cd2bd 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -802,10 +802,12 @@ enum write_ordering_e { }; struct fifo_buffer { - int *values; unsigned int head_index; unsigned int size; + int total; /* sum of all values */ + int values[0]; }; +extern struct fifo_buffer *fifo_alloc(int fifo_size); /* flag bits per tconn */ enum { @@ -996,9 +998,8 @@ struct drbd_conf { int rs_last_events; /* counter of read or write "events" (unit sectors) * on the lower level device when we last looked. */ int c_sync_rate; /* current resync rate after syncer throttle magic */ - struct fifo_buffer rs_plan_s; /* correction values of resync planer */ + struct fifo_buffer *rs_plan_s; /* correction values of resync planer */ int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */ - int rs_planed; /* resync sectors already planned */ atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */ int peer_max_bio_size; int local_max_bio_size; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index de6afa75dec..240319ce865 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2310,6 +2310,7 @@ void drbd_delete_device(struct drbd_conf *mdev) __free_page(mdev->md_io_page); put_disk(mdev->vdisk); blk_cleanup_queue(mdev->rq_queue); + kfree(mdev->rs_plan_s); kfree(mdev); kref_put(&tconn->kref, &conn_destroy); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ea62838e079..7fda3d9445c 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1107,8 +1107,8 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) enum drbd_ret_code retcode; struct drbd_conf *mdev; struct disk_conf *new_disk_conf, *old_disk_conf; + struct fifo_buffer *rs_plan_s = NULL; int err, fifo_size; - int *rs_plan_s = NULL; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); if (!adm_ctx.reply_skb) @@ -1153,8 +1153,8 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) new_disk_conf->al_extents = DRBD_AL_EXTENTS_MAX; fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ; - if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { - rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); + if (fifo_size != mdev->rs_plan_s->size) { + rs_plan_s = fifo_alloc(fifo_size); if (!rs_plan_s) { dev_err(DEV, "kmalloc of fifo_buffer failed"); retcode = ERR_NOMEM; @@ -1162,14 +1162,6 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) } } - if (fifo_size != mdev->rs_plan_s.size) { - kfree(mdev->rs_plan_s.values); - mdev->rs_plan_s.values = rs_plan_s; - mdev->rs_plan_s.size = fifo_size; - mdev->rs_planed = 0; - rs_plan_s = NULL; - } - wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); drbd_al_shrink(mdev); err = drbd_check_al_size(mdev, new_disk_conf); @@ -1192,6 +1184,14 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto fail_unlock; + spin_lock(&mdev->peer_seq_lock); + if (rs_plan_s) { + kfree(mdev->rs_plan_s); + mdev->rs_plan_s = rs_plan_s; + rs_plan_s = NULL; + } + spin_unlock(&mdev->peer_seq_lock); + drbd_md_sync(mdev); if (mdev->state.conn >= C_CONNECTED) @@ -1226,6 +1226,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) struct disk_conf *new_disk_conf = NULL; struct block_device *bdev; struct lru_cache *resync_lru = NULL; + struct fifo_buffer *new_plan = NULL; union drbd_state ns, os; enum drbd_state_rv rv; struct net_conf *nc; @@ -1272,6 +1273,12 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto fail; } + new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ); + if (!new_plan) { + retcode = ERR_NOMEM; + goto fail; + } + if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { retcode = ERR_MD_IDX_INVALID; goto fail; @@ -1443,7 +1450,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) else clear_bit(MD_NO_FUA, &mdev->flags); - /* FIXME Missing stuff: rs_plan_s, clip al range */ + /* FIXME Missing stuff: clip al range */ /* Point of no return reached. * Devices and memory are no longer released by error cleanup below. @@ -1452,9 +1459,11 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) D_ASSERT(mdev->ldev == NULL); mdev->ldev = nbc; mdev->resync = resync_lru; + mdev->rs_plan_s = new_plan; nbc = NULL; resync_lru = NULL; new_disk_conf = NULL; + new_plan = NULL; mdev->write_ordering = WO_bdev_flush; drbd_bump_write_ordering(mdev, WO_bdev_flush); @@ -1615,6 +1624,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } kfree(new_disk_conf); lc_destroy(resync_lru); + kfree(new_plan); finish: drbd_adm_finish(info, retcode); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index add41764ec5..19b421f44ff 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3159,7 +3159,7 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) struct net_conf *old_net_conf, *new_net_conf = NULL; struct disk_conf *old_disk_conf, *new_disk_conf = NULL; const int apv = tconn->agreed_pro_version; - int *rs_plan_s = NULL; + struct fifo_buffer *rs_plan_s = NULL; int fifo_size = 0; int err; @@ -3277,8 +3277,8 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate); fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ; - if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { - rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); + if (fifo_size != mdev->rs_plan_s->size) { + rs_plan_s = fifo_alloc(fifo_size); if (!rs_plan_s) { dev_err(DEV, "kmalloc of fifo_buffer failed"); put_ldev(mdev); @@ -3317,10 +3317,8 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf); spin_lock(&mdev->peer_seq_lock); if (rs_plan_s) { - kfree(mdev->rs_plan_s.values); - mdev->rs_plan_s.values = rs_plan_s; - mdev->rs_plan_s.size = fifo_size; - mdev->rs_planed = 0; + kfree(mdev->rs_plan_s); + mdev->rs_plan_s = rs_plan_s; } spin_unlock(&mdev->peer_seq_lock); @@ -3333,6 +3331,7 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) return 0; disconnect: + kfree(rs_plan_s); mutex_unlock(&mdev->tconn->conf_update); /* just for completeness: actually not needed, * as this is not reached if csums_tfm was ok. */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 4f45f75173a..131887b7855 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -434,6 +434,21 @@ static void fifo_add_val(struct fifo_buffer *fb, int value) fb->values[i] += value; } +struct fifo_buffer *fifo_alloc(int fifo_size) +{ + struct fifo_buffer *fb; + + fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_KERNEL); + if (!fb) + return NULL; + + fb->head_index = 0; + fb->size = fifo_size; + fb->total = 0; + + return fb; +} + static int drbd_rs_controller(struct drbd_conf *mdev) { struct disk_conf *dc; @@ -453,7 +468,7 @@ static int drbd_rs_controller(struct drbd_conf *mdev) rcu_read_lock(); dc = rcu_dereference(mdev->ldev->disk_conf); - steps = mdev->rs_plan_s.size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ + steps = mdev->rs_plan_s->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */ want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps; @@ -462,16 +477,16 @@ static int drbd_rs_controller(struct drbd_conf *mdev) sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10); } - correction = want - mdev->rs_in_flight - mdev->rs_planed; + correction = want - mdev->rs_in_flight - mdev->rs_plan_s->total; /* Plan ahead */ cps = correction / steps; - fifo_add_val(&mdev->rs_plan_s, cps); - mdev->rs_planed += cps * steps; + fifo_add_val(mdev->rs_plan_s, cps); + mdev->rs_plan_s->total += cps * steps; /* What we do in this step */ - curr_corr = fifo_push(&mdev->rs_plan_s, 0); - mdev->rs_planed -= curr_corr; + curr_corr = fifo_push(mdev->rs_plan_s, 0); + mdev->rs_plan_s->total -= curr_corr; req_sect = sect_in + curr_corr; if (req_sect < 0) @@ -495,7 +510,7 @@ static int drbd_rs_controller(struct drbd_conf *mdev) static int drbd_rs_number_requests(struct drbd_conf *mdev) { int number; - if (mdev->rs_plan_s.size) { /* rcu_dereference(mdev->ldev->disk_conf)->c_plan_ahead */ + if (mdev->rs_plan_s->size) { /* rcu_dereference(mdev->ldev->disk_conf)->c_plan_ahead */ number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9); mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; } else { @@ -1456,9 +1471,9 @@ void drbd_rs_controller_reset(struct drbd_conf *mdev) atomic_set(&mdev->rs_sect_in, 0); atomic_set(&mdev->rs_sect_ev, 0); mdev->rs_in_flight = 0; - mdev->rs_planed = 0; + mdev->rs_plan_s->total = 0; spin_lock(&mdev->peer_seq_lock); - fifo_set(&mdev->rs_plan_s, 0); + fifo_set(mdev->rs_plan_s, 0); spin_unlock(&mdev->peer_seq_lock); } From d589a21e5d9099a6351862ae6a7f4ae5ec4103d4 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 4 May 2011 10:06:52 +0200 Subject: [PATCH 364/609] drbd: Enforce limits of disk_conf members; centralized these checks Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 7fda3d9445c..812e91f1b6d 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -937,9 +937,6 @@ static int drbd_check_al_size(struct drbd_conf *mdev, struct disk_conf *dc) unsigned int in_use; int i; - if (!expect(dc->al_extents >= DRBD_AL_EXTENTS_MIN)) - dc->al_extents = DRBD_AL_EXTENTS_MIN; - if (mdev->act_log && mdev->act_log->nr_elements == dc->al_extents) return 0; @@ -1102,6 +1099,17 @@ static bool should_set_defaults(struct genl_info *info) return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS); } +static void enforce_disk_conf_limits(struct disk_conf *dc) +{ + if (dc->al_extents < DRBD_AL_EXTENTS_MIN) + dc->al_extents = DRBD_AL_EXTENTS_MIN; + if (dc->al_extents > DRBD_AL_EXTENTS_MAX) + dc->al_extents = DRBD_AL_EXTENTS_MAX; + + if (dc->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX) + dc->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX; +} + int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; @@ -1146,11 +1154,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) if (!expect(new_disk_conf->resync_rate >= 1)) new_disk_conf->resync_rate = 1; - /* clip to allowed range */ - if (!expect(new_disk_conf->al_extents >= DRBD_AL_EXTENTS_MIN)) - new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN; - if (!expect(new_disk_conf->al_extents <= DRBD_AL_EXTENTS_MAX)) - new_disk_conf->al_extents = DRBD_AL_EXTENTS_MAX; + enforce_disk_conf_limits(new_disk_conf); fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ; if (fifo_size != mdev->rs_plan_s->size) { @@ -1273,6 +1277,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto fail; } + enforce_disk_conf_limits(new_disk_conf); + new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ); if (!new_plan) { retcode = ERR_NOMEM; @@ -1450,8 +1456,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) else clear_bit(MD_NO_FUA, &mdev->flags); - /* FIXME Missing stuff: clip al range */ - /* Point of no return reached. * Devices and memory are no longer released by error cleanup below. * now mdev takes over responsibility, and the state engine should From 813472ced7fac734157fe5be1137ce2bac942902 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 3 May 2011 16:47:02 +0200 Subject: [PATCH 365/609] drbd: RCU for rs_plan_s This removes the issue with using peer_seq_lock out of different contexts. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_nl.c | 18 ++++---- drivers/block/drbd/drbd_receiver.c | 71 ++++++++++++++++++------------ drivers/block/drbd/drbd_worker.c | 41 ++++++++++------- 4 files changed, 77 insertions(+), 55 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 2ecee6cd2bd..3f377e21a2f 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -998,7 +998,7 @@ struct drbd_conf { int rs_last_events; /* counter of read or write "events" (unit sectors) * on the lower level device when we last looked. */ int c_sync_rate; /* current resync rate after syncer throttle magic */ - struct fifo_buffer *rs_plan_s; /* correction values of resync planer */ + struct fifo_buffer *rs_plan_s; /* correction values of resync planer (RCU, tconn->conn_update) */ int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */ atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */ int peer_max_bio_size; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 812e91f1b6d..9af097416e2 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1115,7 +1115,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) enum drbd_ret_code retcode; struct drbd_conf *mdev; struct disk_conf *new_disk_conf, *old_disk_conf; - struct fifo_buffer *rs_plan_s = NULL; + struct fifo_buffer *old_plan = NULL, *new_plan = NULL; int err, fifo_size; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); @@ -1158,8 +1158,8 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ; if (fifo_size != mdev->rs_plan_s->size) { - rs_plan_s = fifo_alloc(fifo_size); - if (!rs_plan_s) { + new_plan = fifo_alloc(fifo_size); + if (!new_plan) { dev_err(DEV, "kmalloc of fifo_buffer failed"); retcode = ERR_NOMEM; goto fail_unlock; @@ -1188,13 +1188,10 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto fail_unlock; - spin_lock(&mdev->peer_seq_lock); - if (rs_plan_s) { - kfree(mdev->rs_plan_s); - mdev->rs_plan_s = rs_plan_s; - rs_plan_s = NULL; + if (new_plan) { + old_plan = mdev->rs_plan_s; + rcu_assign_pointer(mdev->rs_plan_s, new_plan); } - spin_unlock(&mdev->peer_seq_lock); drbd_md_sync(mdev); @@ -1204,13 +1201,14 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) mutex_unlock(&mdev->tconn->conf_update); synchronize_rcu(); kfree(old_disk_conf); + kfree(old_plan); goto success; fail_unlock: mutex_unlock(&mdev->tconn->conf_update); fail: kfree(new_disk_conf); - kfree(rs_plan_s); + kfree(new_plan); success: put_ldev(mdev); out: diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 19b421f44ff..83d39859a9f 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3157,9 +3157,9 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) struct crypto_hash *verify_tfm = NULL; struct crypto_hash *csums_tfm = NULL; struct net_conf *old_net_conf, *new_net_conf = NULL; - struct disk_conf *old_disk_conf, *new_disk_conf = NULL; + struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL; const int apv = tconn->agreed_pro_version; - struct fifo_buffer *rs_plan_s = NULL; + struct fifo_buffer *old_plan = NULL, *new_plan = NULL; int fifo_size = 0; int err; @@ -3200,18 +3200,22 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) if (err) return err; - new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); - if (!new_disk_conf) { - dev_err(DEV, "Allocation of new disk_conf failed\n"); - return -ENOMEM; - } - mutex_lock(&mdev->tconn->conf_update); old_net_conf = mdev->tconn->net_conf; - old_disk_conf = mdev->ldev->disk_conf; - *new_disk_conf = *old_disk_conf; + if (get_ldev(mdev)) { + new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); + if (!new_disk_conf) { + put_ldev(mdev); + mutex_unlock(&mdev->tconn->conf_update); + dev_err(DEV, "Allocation of new disk_conf failed\n"); + return -ENOMEM; + } - new_disk_conf->resync_rate = be32_to_cpu(p->rate); + old_disk_conf = mdev->ldev->disk_conf; + *new_disk_conf = *old_disk_conf; + + new_disk_conf->resync_rate = be32_to_cpu(p->rate); + } if (apv >= 88) { if (apv == 88) { @@ -3219,15 +3223,13 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) dev_err(DEV, "verify-alg too long, " "peer wants %u, accepting only %u byte\n", data_size, SHARED_SECRET_MAX); - mutex_unlock(&mdev->tconn->conf_update); - return -EIO; + err = -EIO; + goto reconnect; } err = drbd_recv_all(mdev->tconn, p->verify_alg, data_size); - if (err) { - mutex_unlock(&mdev->tconn->conf_update); - return err; - } + if (err) + goto reconnect; /* we expect NUL terminated string */ /* but just in case someone tries to be evil */ D_ASSERT(p->verify_alg[data_size-1] == 0); @@ -3270,7 +3272,7 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) } } - if (apv > 94) { + if (apv > 94 && new_disk_conf) { new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead); new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target); new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target); @@ -3278,8 +3280,8 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ; if (fifo_size != mdev->rs_plan_s->size) { - rs_plan_s = fifo_alloc(fifo_size); - if (!rs_plan_s) { + new_plan = fifo_alloc(fifo_size); + if (!new_plan) { dev_err(DEV, "kmalloc of fifo_buffer failed"); put_ldev(mdev); goto disconnect; @@ -3314,24 +3316,39 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) } } - rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf); - spin_lock(&mdev->peer_seq_lock); - if (rs_plan_s) { - kfree(mdev->rs_plan_s); - mdev->rs_plan_s = rs_plan_s; + if (new_disk_conf) { + rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf); + put_ldev(mdev); + } + + if (new_plan) { + old_plan = mdev->rs_plan_s; + rcu_assign_pointer(mdev->rs_plan_s, new_plan); } - spin_unlock(&mdev->peer_seq_lock); mutex_unlock(&mdev->tconn->conf_update); synchronize_rcu(); if (new_net_conf) kfree(old_net_conf); kfree(old_disk_conf); + kfree(old_plan); return 0; +reconnect: + if (new_disk_conf) { + put_ldev(mdev); + kfree(new_disk_conf); + } + mutex_unlock(&mdev->tconn->conf_update); + return -EIO; + disconnect: - kfree(rs_plan_s); + kfree(new_plan); + if (new_disk_conf) { + put_ldev(mdev); + kfree(new_disk_conf); + } mutex_unlock(&mdev->tconn->conf_update); /* just for completeness: actually not needed, * as this is not reached if csums_tfm was ok. */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 131887b7855..e37c42d5dd6 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -460,15 +460,15 @@ static int drbd_rs_controller(struct drbd_conf *mdev) int steps; /* Number of time steps to plan ahead */ int curr_corr; int max_sect; + struct fifo_buffer *plan; sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */ mdev->rs_in_flight -= sect_in; - spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */ - rcu_read_lock(); dc = rcu_dereference(mdev->ldev->disk_conf); + plan = rcu_dereference(mdev->rs_plan_s); - steps = mdev->rs_plan_s->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ + steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */ want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps; @@ -477,16 +477,16 @@ static int drbd_rs_controller(struct drbd_conf *mdev) sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10); } - correction = want - mdev->rs_in_flight - mdev->rs_plan_s->total; + correction = want - mdev->rs_in_flight - plan->total; /* Plan ahead */ cps = correction / steps; - fifo_add_val(mdev->rs_plan_s, cps); - mdev->rs_plan_s->total += cps * steps; + fifo_add_val(plan, cps); + plan->total += cps * steps; /* What we do in this step */ - curr_corr = fifo_push(mdev->rs_plan_s, 0); - mdev->rs_plan_s->total -= curr_corr; + curr_corr = fifo_push(plan, 0); + plan->total -= curr_corr; req_sect = sect_in + curr_corr; if (req_sect < 0) @@ -501,8 +501,6 @@ static int drbd_rs_controller(struct drbd_conf *mdev) sect_in, mdev->rs_in_flight, want, correction, steps, cps, mdev->rs_planed, curr_corr, req_sect); */ - rcu_read_unlock(); - spin_unlock(&mdev->peer_seq_lock); return req_sect; } @@ -510,15 +508,16 @@ static int drbd_rs_controller(struct drbd_conf *mdev) static int drbd_rs_number_requests(struct drbd_conf *mdev) { int number; - if (mdev->rs_plan_s->size) { /* rcu_dereference(mdev->ldev->disk_conf)->c_plan_ahead */ + + rcu_read_lock(); + if (rcu_dereference(mdev->rs_plan_s)->size) { number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9); mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; } else { - rcu_read_lock(); mdev->c_sync_rate = rcu_dereference(mdev->ldev->disk_conf)->resync_rate; - rcu_read_unlock(); number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); } + rcu_read_unlock(); /* ignore the amount of pending requests, the resync controller should * throttle down to incoming reply rate soon enough anyways. */ @@ -1468,13 +1467,21 @@ void drbd_sync_after_changed(struct drbd_conf *mdev) void drbd_rs_controller_reset(struct drbd_conf *mdev) { + struct fifo_buffer *plan; + atomic_set(&mdev->rs_sect_in, 0); atomic_set(&mdev->rs_sect_ev, 0); mdev->rs_in_flight = 0; - mdev->rs_plan_s->total = 0; - spin_lock(&mdev->peer_seq_lock); - fifo_set(mdev->rs_plan_s, 0); - spin_unlock(&mdev->peer_seq_lock); + + /* Updating the RCU protected object in place is necessary since + this function gets called from atomic context. + It is valid since all other updates also lead to an completely + empty fifo */ + rcu_read_lock(); + plan = rcu_dereference(mdev->rs_plan_s); + plan->total = 0; + fifo_set(plan, 0); + rcu_read_unlock(); } void start_resync_timer_fn(unsigned long data) From a5d8e1fb9d22851de89bbf52db6b11c56b895dd4 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 4 May 2011 16:06:51 +0200 Subject: [PATCH 366/609] drbd: Convert boolean flags on netlink from NLA_FLAG to NLA_U8 Flags of type NLA_FLAG are either present or absent, but do not have a value by themselves. Use type NLA_U8 for our boolean flags instead, and use the value to determine if the flag should be on or off. On the drbdsetup command line, all those flags have an optional yes/no argument which defaults to yes. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/genl_magic_struct.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index ddbdd0a2447..b1ddbb5bd72 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -86,8 +86,8 @@ enum { /* possible field types */ #define __flg_field(attr_nr, attr_flag, name) \ - __field(attr_nr, attr_flag, name, NLA_FLAG, char, \ - nla_get_flag, __nla_put_flag) + __field(attr_nr, attr_flag, name, NLA_U8, char, \ + nla_get_u8, NLA_PUT_U8) #define __u8_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \ nla_get_u8, NLA_PUT_U8) @@ -118,12 +118,6 @@ enum { #define __str_field_def(attr_nr, attr_flag, name, maxlen) \ __str_field(attr_nr, attr_flag, name, maxlen) -#define __nla_put_flag(skb, attrtype, value) \ - do { \ - if (value) \ - NLA_PUT_FLAG(skb, attrtype); \ - } while (0) - #define GENL_op_init(args...) args #define GENL_doit(handler) \ .doit = handler, \ From 66b2f6b9c59c5e7003e13281dfe72e174f93988c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 4 May 2011 15:25:35 +0200 Subject: [PATCH 367/609] drbd: Turn no-disk-flushes into disk-flushes={yes|no} Change the --no-disk-flushes drbdsetup command line option as well as the no_disk_flush netlink packet. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 2 +- include/linux/drbd_genl.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 83d39859a9f..e7a6eeae94e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1179,7 +1179,7 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) rcu_read_lock(); dc = rcu_dereference(mdev->ldev->disk_conf); - if (wo == WO_bdev_flush && dc->no_disk_flush) + if (wo == WO_bdev_flush && !dc->disk_flushes) wo = WO_drain_io; if (wo == WO_drain_io && dc->no_disk_drain) wo = WO_none; diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index f143e3c0f33..945c4dd3470 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -122,8 +122,8 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __u32_field_def(14, GENLA_F_MANDATORY, c_max_rate, DRBD_C_MAX_RATE_DEF) __u32_field_def(15, GENLA_F_MANDATORY, c_min_rate, DRBD_C_MIN_RATE_DEF) - __flg_field_def(16, GENLA_F_MANDATORY, no_disk_barrier, 0) - __flg_field_def(17, GENLA_F_MANDATORY, no_disk_flush, 0) + __flg_field_def(16, GENLA_F_MANDATORY, disk_barrier, 1) + __flg_field_def(17, GENLA_F_MANDATORY, disk_flushes, 1) __flg_field_def(18, GENLA_F_MANDATORY, no_disk_drain, 0) __flg_field_def(19, GENLA_F_MANDATORY, no_md_flush, 0) ) From d0c980e236243cd03aa2291243587ac1ba3c2b04 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 4 May 2011 15:25:35 +0200 Subject: [PATCH 368/609] drbd: Turn no-disk-drain into disk-drain={yes|no} Change the --no-disk-drain drbdsetup command line option as well as the no_disk_drain netlink packet. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 2 +- include/linux/drbd_genl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e7a6eeae94e..5d1bdda8ec9 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1181,7 +1181,7 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) if (wo == WO_bdev_flush && !dc->disk_flushes) wo = WO_drain_io; - if (wo == WO_drain_io && dc->no_disk_drain) + if (wo == WO_drain_io && !dc->disk_drain) wo = WO_none; rcu_read_unlock(); mdev->write_ordering = wo; diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 945c4dd3470..30ad6600b44 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -124,7 +124,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __flg_field_def(16, GENLA_F_MANDATORY, disk_barrier, 1) __flg_field_def(17, GENLA_F_MANDATORY, disk_flushes, 1) - __flg_field_def(18, GENLA_F_MANDATORY, no_disk_drain, 0) + __flg_field_def(18, GENLA_F_MANDATORY, disk_drain, 1) __flg_field_def(19, GENLA_F_MANDATORY, no_md_flush, 0) ) From e544046ab842ab93c275a6fc4e043c1cb637076d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 4 May 2011 15:25:35 +0200 Subject: [PATCH 369/609] drbd: Turn no-md-flushes into md-flushes={yes|no} Change the --no-md-flushes drbdsetup command line option as well as the no_md_flush netlink packet. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 6 +++--- include/linux/drbd_genl.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 9af097416e2..4a946a877bd 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1449,10 +1449,10 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) /* Reset the "barriers don't work" bits here, then force meta data to * be written, to ensure we determine if barriers are supported. */ - if (new_disk_conf->no_md_flush) - set_bit(MD_NO_FUA, &mdev->flags); - else + if (new_disk_conf->md_flushes) clear_bit(MD_NO_FUA, &mdev->flags); + else + set_bit(MD_NO_FUA, &mdev->flags); /* Point of no return reached. * Devices and memory are no longer released by error cleanup below. diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 30ad6600b44..53518fc2315 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -125,7 +125,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __flg_field_def(16, GENLA_F_MANDATORY, disk_barrier, 1) __flg_field_def(17, GENLA_F_MANDATORY, disk_flushes, 1) __flg_field_def(18, GENLA_F_MANDATORY, disk_drain, 1) - __flg_field_def(19, GENLA_F_MANDATORY, no_md_flush, 0) + __flg_field_def(19, GENLA_F_MANDATORY, md_flushes, 1) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, From bb77d34ecc6fe6cdc3f4f0841a516695c2eacc04 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 4 May 2011 15:25:35 +0200 Subject: [PATCH 370/609] drbd: Turn no-tcp-cork into tcp-cork={yes|no} Change the --no-tcp-cork drbdsetup command line option as well as the no_cork netlink packet. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 8 ++++---- drivers/block/drbd/drbd_worker.c | 2 +- include/linux/drbd_genl.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 5d1bdda8ec9..b4858bb7894 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -5040,7 +5040,7 @@ int drbd_asender(struct drbd_thread *thi) int expect = header_size; bool ping_timeout_active = false; struct net_conf *nc; - int ping_timeo, no_cork, ping_int; + int ping_timeo, tcp_cork, ping_int; current->policy = SCHED_RR; /* Make this a realtime task! */ current->rt_priority = 2; /* more important than all other tasks */ @@ -5051,7 +5051,7 @@ int drbd_asender(struct drbd_thread *thi) rcu_read_lock(); nc = rcu_dereference(tconn->net_conf); ping_timeo = nc->ping_timeo; - no_cork = nc->no_cork; + tcp_cork = nc->tcp_cork; ping_int = nc->ping_int; rcu_read_unlock(); @@ -5066,14 +5066,14 @@ int drbd_asender(struct drbd_thread *thi) /* TODO: conditionally cork; it may hurt latency if we cork without much to send */ - if (!no_cork) + if (tcp_cork) drbd_tcp_cork(tconn->meta.socket); if (tconn_finish_peer_reqs(tconn)) { conn_err(tconn, "tconn_finish_peer_reqs() failed\n"); goto reconnect; } /* but unconditionally uncork unless disabled */ - if (!no_cork) + if (tcp_cork) drbd_tcp_uncork(tconn->meta.socket); /* short circuit, recv_msg would return EINTR anyways. */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index e37c42d5dd6..78c3de49eff 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1694,7 +1694,7 @@ int drbd_worker(struct drbd_thread *thi) rcu_read_lock(); nc = rcu_dereference(tconn->net_conf); - cork = nc ? !nc->no_cork : 0; + cork = nc ? nc->tcp_cork : 0; rcu_read_unlock(); if (tconn->data.socket && cork) diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 53518fc2315..6632d10f1ee 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -162,7 +162,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __u32_field_def(25, GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) __flg_field_def(26, GENLA_F_MANDATORY, two_primaries, 0) __flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT, want_lose) - __flg_field_def(28, GENLA_F_MANDATORY, no_cork, 0) + __flg_field_def(28, GENLA_F_MANDATORY, tcp_cork, 1) __flg_field_def(29, GENLA_F_MANDATORY, always_asbp, 0) __flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT, dry_run) __flg_field_def(31, GENLA_F_MANDATORY, use_rle, 0) From 81fa2e675ccf88035d9308a3c98fe2c2928ace00 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 4 May 2011 15:10:30 +0200 Subject: [PATCH 371/609] drbd: Refcounting for mdev objects Preparing removal of drbd_cfg_rwsem Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 3 ++- drivers/block/drbd/drbd_main.c | 27 +++++++++++++++++---------- drivers/block/drbd/drbd_nl.c | 8 ++++++-- drivers/block/drbd/drbd_receiver.c | 12 ++++++------ 4 files changed, 31 insertions(+), 19 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 3f377e21a2f..8bc604e72d1 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -867,6 +867,7 @@ struct drbd_tconn { /* is a resource from the config file */ struct drbd_conf { struct drbd_tconn *tconn; int vnr; /* volume number within the connection */ + struct kref kref; /* things that are stored as / read from meta data on disk */ unsigned long flags; @@ -1373,7 +1374,7 @@ extern rwlock_t global_state_lock; extern int conn_lowest_minor(struct drbd_tconn *tconn); enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr); -extern void drbd_delete_device(struct drbd_conf *mdev); +extern void drbd_minor_destroy(struct kref *kref); struct drbd_tconn *conn_create(const char *name); extern void conn_destroy(struct kref *kref); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 240319ce865..8da0e99ee7e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2052,7 +2052,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) init_waitqueue_head(&mdev->al_wait); init_waitqueue_head(&mdev->seq_wait); - /* mdev->tconn->agreed_pro_version gets initialized in drbd_connect() */ mdev->write_ordering = WO_bdev_flush; mdev->resync_wenr = LC_FREE; mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; @@ -2272,21 +2271,16 @@ static void drbd_release_all_peer_reqs(struct drbd_conf *mdev) } /* caution. no locking. */ -void drbd_delete_device(struct drbd_conf *mdev) +void drbd_minor_destroy(struct kref *kref) { + struct drbd_conf *mdev = container_of(kref, struct drbd_conf, kref); struct drbd_tconn *tconn = mdev->tconn; - idr_remove(&mdev->tconn->volumes, mdev->vnr); - idr_remove(&minors, mdev_to_minor(mdev)); - synchronize_rcu(); - /* paranoia asserts */ D_ASSERT(mdev->open_cnt == 0); D_ASSERT(list_empty(&mdev->tconn->data.work.q)); /* end paranoia asserts */ - del_gendisk(mdev->vdisk); - /* cleanup stuff that may have been allocated during * device (re-)configuration or state changes */ @@ -2320,6 +2314,7 @@ static void drbd_cleanup(void) { unsigned int i; struct drbd_conf *mdev; + struct drbd_tconn *tconn, *tmp; unregister_reboot_notifier(&drbd_notifier); @@ -2337,8 +2332,19 @@ static void drbd_cleanup(void) drbd_genl_unregister(); down_write(&drbd_cfg_rwsem); - idr_for_each_entry(&minors, mdev, i) - drbd_delete_device(mdev); + idr_for_each_entry(&minors, mdev, i) { + idr_remove(&minors, mdev_to_minor(mdev)); + idr_remove(&mdev->tconn->volumes, mdev->vnr); + del_gendisk(mdev->vdisk); + synchronize_rcu(); + kref_put(&mdev->kref, &drbd_minor_destroy); + } + + list_for_each_entry_safe(tconn, tmp, &drbd_tconns, all_tconn) { + list_del(&tconn->all_tconn); + synchronize_rcu(); + kref_put(&tconn->kref, &conn_destroy); + } up_write(&drbd_cfg_rwsem); drbd_destroy_mempools(); @@ -2625,6 +2631,7 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, goto out_idr_remove_vol; } add_disk(disk); + kref_init(&mdev->kref); /* one ref for both idrs and the the add_disk */ /* inherit the connection state */ mdev->state.conn = tconn->cstate; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 4a946a877bd..5747bc6a7c4 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1065,7 +1065,7 @@ static void conn_reconfig_done(struct drbd_tconn *tconn) spin_unlock_irq(&tconn->req_lock); if (stop_threads) { /* asender is implicitly stopped by receiver - * in drbd_disconnect() */ + * in conn_disconnect() */ drbd_thread_stop(&tconn->receiver); drbd_thread_stop(&tconn->worker); } @@ -3033,7 +3033,11 @@ static enum drbd_ret_code adm_delete_minor(struct drbd_conf *mdev) * we may want to delete a minor from a live replication group. */ mdev->state.role == R_SECONDARY) { - drbd_delete_device(mdev); + idr_remove(&mdev->tconn->volumes, mdev->vnr); + idr_remove(&minors, mdev_to_minor(mdev)); + del_gendisk(mdev->vdisk); + synchronize_rcu(); + kref_put(&mdev->kref, &drbd_minor_destroy); return NO_ERROR; } else return ERR_MINOR_CONFIGURED; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index b4858bb7894..7156e53b000 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -844,7 +844,7 @@ int drbd_connected(int vnr, void *p, void *data) * no point in trying again, please go standalone. * -2 We do not have a network config... */ -static int drbd_connect(struct drbd_tconn *tconn) +static int conn_connect(struct drbd_tconn *tconn) { struct socket *sock, *msock; struct net_conf *nc; @@ -878,7 +878,7 @@ static int drbd_connect(struct drbd_tconn *tconn) tconn->meta.socket = s; send_first_packet(tconn, &tconn->meta, P_INITIAL_META); } else { - conn_err(tconn, "Logic error in drbd_connect()\n"); + conn_err(tconn, "Logic error in conn_connect()\n"); goto out_release_sockets; } } @@ -4240,7 +4240,7 @@ void conn_flush_workqueue(struct drbd_tconn *tconn) wait_for_completion(&barr.done); } -static void drbd_disconnect(struct drbd_tconn *tconn) +static void conn_disconnect(struct drbd_tconn *tconn) { enum drbd_conns oc; int rv = SS_UNKNOWN_ERROR; @@ -4636,9 +4636,9 @@ int drbdd_init(struct drbd_thread *thi) conn_info(tconn, "receiver (re)started\n"); do { - h = drbd_connect(tconn); + h = conn_connect(tconn); if (h == 0) { - drbd_disconnect(tconn); + conn_disconnect(tconn); schedule_timeout_interruptible(HZ); } if (h == -1) { @@ -4650,7 +4650,7 @@ int drbdd_init(struct drbd_thread *thi) if (h > 0) drbdd(tconn); - drbd_disconnect(tconn); + conn_disconnect(tconn); conn_info(tconn, "receiver terminated\n"); return 0; From ec0bddbc5574ea5903cec8f30ed57777f14d86a8 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 4 May 2011 15:47:01 +0200 Subject: [PATCH 372/609] drbd: Use RCU for the drbd_tconns list Preparing removal of drbd_cfg_rwsem Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 7 ++++--- drivers/block/drbd/drbd_nl.c | 12 +++++++----- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 8da0e99ee7e..771b53ece97 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2341,7 +2341,7 @@ static void drbd_cleanup(void) } list_for_each_entry_safe(tconn, tmp, &drbd_tconns, all_tconn) { - list_del(&tconn->all_tconn); + list_del_rcu(&tconn->all_tconn); synchronize_rcu(); kref_put(&tconn->kref, &conn_destroy); } @@ -2409,7 +2409,7 @@ struct drbd_tconn *conn_get_by_name(const char *name) return NULL; down_read(&drbd_cfg_rwsem); - list_for_each_entry(tconn, &drbd_tconns, all_tconn) { + list_for_each_entry_rcu(tconn, &drbd_tconns, all_tconn) { if (!strcmp(tconn->name, name)) { kref_get(&tconn->kref); goto found; @@ -2459,6 +2459,7 @@ void conn_free_crypto(struct drbd_tconn *tconn) tconn->int_dig_vv = NULL; } +/* caller must be under genl_lock() */ struct drbd_tconn *conn_create(const char *name) { struct drbd_tconn *tconn; @@ -2503,7 +2504,7 @@ struct drbd_tconn *conn_create(const char *name) down_write(&drbd_cfg_rwsem); kref_init(&tconn->kref); - list_add_tail(&tconn->all_tconn, &drbd_tconns); + list_add_tail_rcu(&tconn->all_tconn, &drbd_tconns); up_write(&drbd_cfg_rwsem); return tconn; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 5747bc6a7c4..a0cf0005baf 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2674,7 +2674,7 @@ int get_one_status(struct sk_buff *skb, struct netlink_callback *cb) /* synchronize with conn_create()/conn_destroy() */ down_read(&drbd_cfg_rwsem); /* revalidate iterator position */ - list_for_each_entry(tmp, &drbd_tconns, all_tconn) { + list_for_each_entry_rcu(tmp, &drbd_tconns, all_tconn) { if (pos == NULL) { /* first iteration */ pos = tmp; @@ -2692,8 +2692,8 @@ next_tconn: if (!mdev) { /* No more volumes to dump on this tconn. * Advance tconn iterator. */ - pos = list_entry(tconn->all_tconn.next, - struct drbd_tconn, all_tconn); + pos = list_entry_rcu(tconn->all_tconn.next, + struct drbd_tconn, all_tconn); /* Did we dump any volume on this tconn yet? */ if (volume != 0) { /* If we reached the end of the list, @@ -3130,7 +3130,8 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) /* delete connection */ if (conn_lowest_minor(adm_ctx.tconn) < 0) { - list_del(&adm_ctx.tconn->all_tconn); + list_del_rcu(&adm_ctx.tconn->all_tconn); + synchronize_rcu(); kref_put(&adm_ctx.tconn->kref, &conn_destroy); retcode = NO_ERROR; @@ -3160,7 +3161,8 @@ int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info) down_write(&drbd_cfg_rwsem); if (conn_lowest_minor(adm_ctx.tconn) < 0) { - list_del(&adm_ctx.tconn->all_tconn); + list_del_rcu(&adm_ctx.tconn->all_tconn); + synchronize_rcu(); kref_put(&adm_ctx.tconn->kref, &conn_destroy); retcode = NO_ERROR; From c141ebda031a0550d75634f7c94f7c85c2d5c9f5 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 5 May 2011 16:13:10 +0200 Subject: [PATCH 373/609] drbd: Removing drbd_cfg_rwsem * Updates to all configuration items is done under genl_lock(). Including removal of mdevs or tconns. * All read non sleeping read sides are protected by rcu * All sleeping read sides keep reference counts to keep the objects alive Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 9 ++--- drivers/block/drbd/drbd_main.c | 20 ++++------ drivers/block/drbd/drbd_nl.c | 43 ++++++++------------- drivers/block/drbd/drbd_proc.c | 6 +-- drivers/block/drbd/drbd_receiver.c | 61 +++++++++++++++++++----------- drivers/block/drbd/drbd_worker.c | 8 +++- 6 files changed, 72 insertions(+), 75 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8bc604e72d1..56b190c6554 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -169,11 +169,8 @@ drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) { #define DRBD_MD_MAGIC (DRBD_MAGIC+4) extern struct ratelimit_state drbd_ratelimit_state; -extern struct idr minors; -extern struct list_head drbd_tconns; -extern struct rw_semaphore drbd_cfg_rwsem; -/* drbd_cfg_rwsem protects: drbd_tconns list, minors idr, tconn->volumes idr - note: non sleeping iterations over the idrs are protoected by RCU */ +extern struct idr minors; /* RCU, updates: genl_lock() */ +extern struct list_head drbd_tconns; /* RCU, updates: genl_lock() */ /* on the wire */ enum drbd_packet { @@ -1477,7 +1474,7 @@ extern struct page *drbd_alloc_pages(struct drbd_conf *, unsigned int, bool); extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled); extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed); extern void conn_flush_workqueue(struct drbd_tconn *tconn); -extern int drbd_connected(int vnr, void *p, void *data); +extern int drbd_connected(struct drbd_conf *mdev); static inline void drbd_flush_workqueue(struct drbd_conf *mdev) { conn_flush_workqueue(mdev->tconn); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 771b53ece97..22c2b4c881d 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -120,7 +120,6 @@ module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0 */ struct idr minors; struct list_head drbd_tconns; /* list of struct drbd_tconn */ -DECLARE_RWSEM(drbd_cfg_rwsem); struct kmem_cache *drbd_request_cache; struct kmem_cache *drbd_ee_cache; /* peer requests */ @@ -2331,21 +2330,20 @@ static void drbd_cleanup(void) drbd_genl_unregister(); - down_write(&drbd_cfg_rwsem); idr_for_each_entry(&minors, mdev, i) { idr_remove(&minors, mdev_to_minor(mdev)); idr_remove(&mdev->tconn->volumes, mdev->vnr); del_gendisk(mdev->vdisk); - synchronize_rcu(); + /* synchronize_rcu(); No other threads running at this point */ kref_put(&mdev->kref, &drbd_minor_destroy); } + /* not _rcu since, no other updater anymore. Genl already unregistered */ list_for_each_entry_safe(tconn, tmp, &drbd_tconns, all_tconn) { - list_del_rcu(&tconn->all_tconn); - synchronize_rcu(); + list_del(&tconn->all_tconn); /* not _rcu no proc, not other threads */ + /* synchronize_rcu(); */ kref_put(&tconn->kref, &conn_destroy); } - up_write(&drbd_cfg_rwsem); drbd_destroy_mempools(); unregister_blkdev(DRBD_MAJOR, "drbd"); @@ -2408,7 +2406,7 @@ struct drbd_tconn *conn_get_by_name(const char *name) if (!name || !name[0]) return NULL; - down_read(&drbd_cfg_rwsem); + rcu_read_lock(); list_for_each_entry_rcu(tconn, &drbd_tconns, all_tconn) { if (!strcmp(tconn->name, name)) { kref_get(&tconn->kref); @@ -2417,7 +2415,7 @@ struct drbd_tconn *conn_get_by_name(const char *name) } tconn = NULL; found: - up_read(&drbd_cfg_rwsem); + rcu_read_unlock(); return tconn; } @@ -2502,10 +2500,8 @@ struct drbd_tconn *conn_create(const char *name) drbd_set_res_opts_defaults(&tconn->res_opts); - down_write(&drbd_cfg_rwsem); kref_init(&tconn->kref); list_add_tail_rcu(&tconn->all_tconn, &drbd_tconns); - up_write(&drbd_cfg_rwsem); return tconn; @@ -2637,7 +2633,7 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, /* inherit the connection state */ mdev->state.conn = tconn->cstate; if (mdev->state.conn == C_WF_REPORT_PARAMS) - drbd_connected(vnr, mdev, tconn); + drbd_connected(mdev); return NO_ERROR; @@ -2913,12 +2909,10 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) } spin_unlock_irq(&mdev->tconn->req_lock); - mutex_lock(&mdev->tconn->conf_update); /* This blocks wants to be get removed... */ bdev->disk_conf->al_extents = be32_to_cpu(buffer->al_nr_extents); if (bdev->disk_conf->al_extents < DRBD_AL_EXTENTS_MIN) bdev->disk_conf->al_extents = DRBD_AL_EXTENTS_DEF; - mutex_unlock(&mdev->tconn->conf_update); err: mutex_unlock(&mdev->md_io_mutex); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index a0cf0005baf..72ce3b0d0e0 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -335,10 +335,15 @@ static void conn_md_sync(struct drbd_tconn *tconn) struct drbd_conf *mdev; int vnr; - down_read(&drbd_cfg_rwsem); - idr_for_each_entry(&tconn->volumes, mdev, vnr) + rcu_read_lock(); + idr_for_each_entry(&tconn->volumes, mdev, vnr) { + kref_get(&mdev->kref); + rcu_read_unlock(); drbd_md_sync(mdev); - up_read(&drbd_cfg_rwsem); + kref_put(&mdev->kref, &drbd_minor_destroy); + rcu_read_lock(); + } + rcu_read_unlock(); } int conn_khelper(struct drbd_tconn *tconn, char *cmd) @@ -1193,12 +1198,12 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(mdev->rs_plan_s, new_plan); } + mutex_unlock(&mdev->tconn->conf_update); drbd_md_sync(mdev); if (mdev->state.conn >= C_CONNECTED) drbd_send_sync_param(mdev); - mutex_unlock(&mdev->tconn->conf_update); synchronize_rcu(); kfree(old_disk_conf); kfree(old_plan); @@ -2013,7 +2018,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) new_my_addr = (struct sockaddr *)&new_conf->my_addr; new_peer_addr = (struct sockaddr *)&new_conf->peer_addr; - /* No need to take drbd_cfg_rwsem here. All reconfiguration is + /* No need for _rcu here. All reconfiguration is * strictly serialized on genl_lock(). We are protected against * concurrent reconfiguration/addition/deletion */ list_for_each_entry(oconn, &drbd_tconns, all_tconn) { @@ -2672,7 +2677,7 @@ int get_one_status(struct sk_buff *skb, struct netlink_callback *cb) */ /* synchronize with conn_create()/conn_destroy() */ - down_read(&drbd_cfg_rwsem); + rcu_read_lock(); /* revalidate iterator position */ list_for_each_entry_rcu(tmp, &drbd_tconns, all_tconn) { if (pos == NULL) { @@ -2738,7 +2743,7 @@ next_tconn: } out: - up_read(&drbd_cfg_rwsem); + rcu_read_unlock(); /* where to start the next iteration */ cb->args[0] = (long)pos; cb->args[1] = (pos == tconn) ? volume + 1 : 0; @@ -3018,9 +3023,7 @@ int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info) goto out; } - down_write(&drbd_cfg_rwsem); retcode = conn_new_minor(adm_ctx.tconn, dh->minor, adm_ctx.volume); - up_write(&drbd_cfg_rwsem); out: drbd_adm_finish(info, retcode); return 0; @@ -3053,9 +3056,7 @@ int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - down_write(&drbd_cfg_rwsem); retcode = adm_delete_minor(adm_ctx.mdev); - up_write(&drbd_cfg_rwsem); out: drbd_adm_finish(info, retcode); return 0; @@ -3078,52 +3079,43 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) goto out; } - down_read(&drbd_cfg_rwsem); /* demote */ idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { retcode = drbd_set_role(mdev, R_SECONDARY, 0); if (retcode < SS_SUCCESS) { drbd_msg_put_info("failed to demote"); - goto out_unlock; + goto out; } } - up_read(&drbd_cfg_rwsem); - /* disconnect; may stop the receiver; - * must not hold the drbd_cfg_rwsem */ retcode = conn_try_disconnect(adm_ctx.tconn, 0); if (retcode < SS_SUCCESS) { drbd_msg_put_info("failed to disconnect"); goto out; } - down_read(&drbd_cfg_rwsem); /* detach */ idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { retcode = adm_detach(mdev); if (retcode < SS_SUCCESS) { drbd_msg_put_info("failed to detach"); - goto out_unlock; + goto out; } } - up_read(&drbd_cfg_rwsem); /* If we reach this, all volumes (of this tconn) are Secondary, * Disconnected, Diskless, aka Unconfigured. Make sure all threads have - * actually stopped, state handling only does drbd_thread_stop_nowait(). - * This needs to be done without holding drbd_cfg_rwsem. */ + * actually stopped, state handling only does drbd_thread_stop_nowait(). */ drbd_thread_stop(&adm_ctx.tconn->worker); /* Now, nothing can fail anymore */ /* delete volumes */ - down_write(&drbd_cfg_rwsem); idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { retcode = adm_delete_minor(mdev); if (retcode != NO_ERROR) { /* "can not happen" */ drbd_msg_put_info("failed to delete volume"); - up_write(&drbd_cfg_rwsem); goto out; } } @@ -3140,10 +3132,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) retcode = ERR_CONN_IN_USE; drbd_msg_put_info("failed to delete connection"); } - up_write(&drbd_cfg_rwsem); goto out; -out_unlock: - up_read(&drbd_cfg_rwsem); out: drbd_adm_finish(info, retcode); return 0; @@ -3159,7 +3148,6 @@ int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - down_write(&drbd_cfg_rwsem); if (conn_lowest_minor(adm_ctx.tconn) < 0) { list_del_rcu(&adm_ctx.tconn->all_tconn); synchronize_rcu(); @@ -3169,7 +3157,6 @@ int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info) } else { retcode = ERR_CONN_IN_USE; } - up_write(&drbd_cfg_rwsem); if (retcode == NO_ERROR) drbd_thread_stop(&adm_ctx.tconn->worker); diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 792a71ec2e6..6b226cca1e8 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -229,7 +229,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v) oos .. known out-of-sync kB */ - down_read(&drbd_cfg_rwsem); + rcu_read_lock(); idr_for_each_entry(&minors, mdev, i) { if (prev_i != i - 1) seq_printf(seq, "\n"); @@ -242,10 +242,8 @@ static int drbd_seq_show(struct seq_file *seq, void *v) mdev->state.role == R_SECONDARY) { seq_printf(seq, "%2d: cs:Unconfigured\n", i); } else { - rcu_read_lock(); nc = rcu_dereference(mdev->tconn->net_conf); wp = nc ? nc->wire_protocol - DRBD_PROT_A + 'A' : ' '; - rcu_read_unlock(); seq_printf(seq, "%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c%c\n" " ns:%u nr:%u dw:%u dr:%u al:%u bm:%u " @@ -299,7 +297,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v) } } } - up_read(&drbd_cfg_rwsem); + rcu_read_unlock(); return 0; } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7156e53b000..aa42967398e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -63,7 +63,7 @@ enum finish_epoch { static int drbd_do_features(struct drbd_tconn *tconn); static int drbd_do_auth(struct drbd_tconn *tconn); -static int drbd_disconnected(int vnr, void *p, void *data); +static int drbd_disconnected(struct drbd_conf *mdev); static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event); static int e_end_block(struct drbd_work *, int); @@ -811,9 +811,8 @@ static int drbd_socket_okay(struct socket **sock) } /* Gets called if a connection is established, or if a new minor gets created in a connection */ -int drbd_connected(int vnr, void *p, void *data) +int drbd_connected(struct drbd_conf *mdev) { - struct drbd_conf *mdev = (struct drbd_conf *)p; int err; atomic_set(&mdev->packet_seq, 0); @@ -847,8 +846,9 @@ int drbd_connected(int vnr, void *p, void *data) static int conn_connect(struct drbd_tconn *tconn) { struct socket *sock, *msock; + struct drbd_conf *mdev; struct net_conf *nc; - int timeout, try, h, ok; + int vnr, timeout, try, h, ok; if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS) return -2; @@ -1001,9 +1001,16 @@ retry: if (drbd_send_protocol(tconn) == -EOPNOTSUPP) return -1; - down_read(&drbd_cfg_rwsem); - h = !idr_for_each(&tconn->volumes, drbd_connected, tconn); - up_read(&drbd_cfg_rwsem); + rcu_read_lock(); + idr_for_each_entry(&tconn->volumes, mdev, vnr) { + kref_get(&mdev->kref); + rcu_read_unlock(); + drbd_connected(mdev); + kref_put(&mdev->kref, &drbd_minor_destroy); + rcu_read_lock(); + } + rcu_read_unlock(); + return h; out_release_sockets: @@ -4242,8 +4249,9 @@ void conn_flush_workqueue(struct drbd_tconn *tconn) static void conn_disconnect(struct drbd_tconn *tconn) { + struct drbd_conf *mdev; enum drbd_conns oc; - int rv = SS_UNKNOWN_ERROR; + int vnr, rv = SS_UNKNOWN_ERROR; if (tconn->cstate == C_STANDALONE) return; @@ -4252,9 +4260,16 @@ static void conn_disconnect(struct drbd_tconn *tconn) drbd_thread_stop(&tconn->asender); drbd_free_sock(tconn); - down_read(&drbd_cfg_rwsem); - idr_for_each(&tconn->volumes, drbd_disconnected, tconn); - up_read(&drbd_cfg_rwsem); + rcu_read_lock(); + idr_for_each_entry(&tconn->volumes, mdev, vnr) { + kref_get(&mdev->kref); + rcu_read_unlock(); + drbd_disconnected(mdev); + kref_put(&mdev->kref, &drbd_minor_destroy); + rcu_read_lock(); + } + rcu_read_unlock(); + conn_info(tconn, "Connection closed\n"); if (conn_highest_role(tconn) == R_PRIMARY && conn_highest_pdsk(tconn) >= D_UNKNOWN) @@ -4271,9 +4286,8 @@ static void conn_disconnect(struct drbd_tconn *tconn) conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD); } -static int drbd_disconnected(int vnr, void *p, void *data) +static int drbd_disconnected(struct drbd_conf *mdev) { - struct drbd_conf *mdev = (struct drbd_conf *)p; enum drbd_fencing_p fp; unsigned int i; @@ -4974,30 +4988,33 @@ static int got_skip(struct drbd_tconn *tconn, struct packet_info *pi) static int tconn_finish_peer_reqs(struct drbd_tconn *tconn) { struct drbd_conf *mdev; - int i, not_empty = 0; + int vnr, not_empty = 0; do { clear_bit(SIGNAL_ASENDER, &tconn->flags); flush_signals(current); - down_read(&drbd_cfg_rwsem); - idr_for_each_entry(&tconn->volumes, mdev, i) { + + rcu_read_lock(); + idr_for_each_entry(&tconn->volumes, mdev, vnr) { + kref_get(&mdev->kref); + rcu_read_unlock(); if (drbd_finish_peer_reqs(mdev)) { - up_read(&drbd_cfg_rwsem); - return 1; /* error */ + kref_put(&mdev->kref, &drbd_minor_destroy); + return 1; } + kref_put(&mdev->kref, &drbd_minor_destroy); + rcu_read_lock(); } - up_read(&drbd_cfg_rwsem); set_bit(SIGNAL_ASENDER, &tconn->flags); spin_lock_irq(&tconn->req_lock); - rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, i) { + idr_for_each_entry(&tconn->volumes, mdev, vnr) { not_empty = !list_empty(&mdev->done_ee); if (not_empty) break; } - rcu_read_unlock(); spin_unlock_irq(&tconn->req_lock); + rcu_read_unlock(); } while (not_empty); return 0; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 78c3de49eff..ec8f4245ef9 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1774,12 +1774,16 @@ int drbd_worker(struct drbd_thread *thi) */ spin_unlock_irq(&tconn->data.work.q_lock); - down_read(&drbd_cfg_rwsem); + rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) { D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); + kref_get(&mdev->kref); + rcu_read_unlock(); drbd_mdev_cleanup(mdev); + kref_put(&mdev->kref, &drbd_minor_destroy); + rcu_read_lock(); } - up_read(&drbd_cfg_rwsem); + rcu_read_unlock(); return 0; } From 6f9b5f84f5fb46def198b943e59c79aea0695c5c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 6 May 2011 01:03:32 +0200 Subject: [PATCH 374/609] drbd: Make broadcast events return NO_ERROR Instead of returning a ret_code outside of the range of enum drbd_ret_code, use NO_ERROR to indicate success. This way, ret_code has the same meaning in all packets. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 72ce3b0d0e0..195428ee605 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -3183,7 +3183,7 @@ void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib) if (!d_out) /* cannot happen, but anyways. */ goto nla_put_failure; d_out->minor = mdev_to_minor(mdev); - d_out->ret_code = 0; + d_out->ret_code = NO_ERROR; if (nla_put_status_info(msg, mdev, sib)) goto nla_put_failure; From 7bac3e6f7e74993475a94487effe05dc1f68bdc7 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 6 May 2011 17:50:57 +0200 Subject: [PATCH 375/609] drbd: Also define the default values of boolean flags in a single place Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_genl.h | 16 ++++++++-------- include/linux/drbd_limits.h | 10 ++++++++++ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 6632d10f1ee..02647dc8c67 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -122,10 +122,10 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __u32_field_def(14, GENLA_F_MANDATORY, c_max_rate, DRBD_C_MAX_RATE_DEF) __u32_field_def(15, GENLA_F_MANDATORY, c_min_rate, DRBD_C_MIN_RATE_DEF) - __flg_field_def(16, GENLA_F_MANDATORY, disk_barrier, 1) - __flg_field_def(17, GENLA_F_MANDATORY, disk_flushes, 1) - __flg_field_def(18, GENLA_F_MANDATORY, disk_drain, 1) - __flg_field_def(19, GENLA_F_MANDATORY, md_flushes, 1) + __flg_field_def(16, GENLA_F_MANDATORY, disk_barrier, DRBD_DISK_BARRIER_DEF) + __flg_field_def(17, GENLA_F_MANDATORY, disk_flushes, DRBD_DISK_FLUSHES_DEF) + __flg_field_def(18, GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) + __flg_field_def(19, GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, @@ -160,12 +160,12 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __u32_field_def(23, GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF) __u32_field_def(24, GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF) __u32_field_def(25, GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) - __flg_field_def(26, GENLA_F_MANDATORY, two_primaries, 0) + __flg_field_def(26, GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF) __flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT, want_lose) - __flg_field_def(28, GENLA_F_MANDATORY, tcp_cork, 1) - __flg_field_def(29, GENLA_F_MANDATORY, always_asbp, 0) + __flg_field_def(28, GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF) + __flg_field_def(29, GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) __flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT, dry_run) - __flg_field_def(31, GENLA_F_MANDATORY, use_rle, 0) + __flg_field_def(31, GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index bcebb016fda..3d3e2d5125c 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -170,4 +170,14 @@ #define DRBD_PROTOCOL_DEF DRBD_PROT_C +#define DRBD_DISK_BARRIER_DEF 0 +#define DRBD_DISK_FLUSHES_DEF 1 +#define DRBD_DISK_DRAIN_DEF 1 +#define DRBD_MD_FLUSHES_DEF 1 +#define DRBD_TCP_CORK_DEF 1 + +#define DRBD_ALLOW_TWO_PRIMARIES_DEF 0 +#define DRBD_ALWAYS_ASBP_DEF 0 +#define DRBD_USE_RLE_DEF 0 + #endif From 6139f60dc192e2c5478c1126d1aff7905dc0a98a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 6 May 2011 20:00:02 +0200 Subject: [PATCH 376/609] drbd: Rename the want_lose field/flag to discard_my_data This is what it is called in config files and on the command line as well. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 6 +++--- drivers/block/drbd/drbd_nl.c | 4 ++-- drivers/block/drbd/drbd_receiver.c | 14 +++++++------- include/linux/drbd_genl.h | 2 +- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 56b190c6554..fa36757ffc4 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -413,7 +413,7 @@ struct p_rs_param_95 { } __packed; enum drbd_conn_flags { - CF_WANT_LOSE = 1, + CF_DISCARD_MY_DATA = 1, CF_DRY_RUN = 2, }; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 22c2b4c881d..86c8bc5ac60 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -943,8 +943,8 @@ int __drbd_send_protocol(struct drbd_tconn *tconn) p->after_sb_2p = cpu_to_be32(nc->after_sb_2p); p->two_primaries = cpu_to_be32(nc->two_primaries); cf = 0; - if (nc->want_lose) - cf |= CF_WANT_LOSE; + if (nc->discard_my_data) + cf |= CF_DISCARD_MY_DATA; if (nc->dry_run) cf |= CF_DRY_RUN; p->conn_flags = cpu_to_be32(cf); @@ -988,7 +988,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) mdev->comm_bm_set = drbd_bm_total_weight(mdev); p->uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set); rcu_read_lock(); - uuid_flags |= rcu_dereference(mdev->tconn->net_conf)->want_lose ? 1 : 0; + uuid_flags |= rcu_dereference(mdev->tconn->net_conf)->discard_my_data ? 1 : 0; rcu_read_unlock(); uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0; uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 195428ee605..9a82306adf9 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -606,7 +606,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) mutex_lock(&mdev->tconn->conf_update); nc = mdev->tconn->net_conf; if (nc) - nc->want_lose = 0; /* without copy; single bit op is atomic */ + nc->discard_my_data = 0; /* without copy; single bit op is atomic */ mutex_unlock(&mdev->tconn->conf_update); set_disk_ro(mdev->vdisk, false); @@ -1738,7 +1738,7 @@ _check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct n if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) return ERR_STONITH_AND_PROT_A; } - if (mdev->state.role == R_PRIMARY && new_conf->want_lose) + if (mdev->state.role == R_PRIMARY && new_conf->discard_my_data) return ERR_DISCARD; } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index aa42967398e..e4e8f8a408d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2908,9 +2908,9 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } if (hg == -100) { - if (nc->want_lose && !(mdev->p_uuid[UI_FLAGS]&1)) + if (nc->discard_my_data && !(mdev->p_uuid[UI_FLAGS]&1)) hg = -1; - if (!nc->want_lose && (mdev->p_uuid[UI_FLAGS]&1)) + if (!nc->discard_my_data && (mdev->p_uuid[UI_FLAGS]&1)) hg = 1; if (abs(hg) < 100) @@ -3009,7 +3009,7 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) { struct p_protocol *p = pi->data; int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; - int p_want_lose, p_two_primaries, cf; + int p_discard_my_data, p_two_primaries, cf; struct net_conf *nc; p_proto = be32_to_cpu(p->protocol); @@ -3018,7 +3018,7 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) p_after_sb_2p = be32_to_cpu(p->after_sb_2p); p_two_primaries = be32_to_cpu(p->two_primaries); cf = be32_to_cpu(p->conn_flags); - p_want_lose = cf & CF_WANT_LOSE; + p_discard_my_data = cf & CF_DISCARD_MY_DATA; if (tconn->agreed_pro_version >= 87) { char integrity_alg[SHARED_SECRET_MAX]; @@ -3075,8 +3075,8 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) goto disconnect_rcu_unlock; } - if (p_want_lose && nc->want_lose) { - conn_err(tconn, "both sides have the 'want_lose' flag set\n"); + if (p_discard_my_data && nc->discard_my_data) { + conn_err(tconn, "both sides have the 'discard_my_data' flag set\n"); goto disconnect_rcu_unlock; } @@ -3806,7 +3806,7 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) } mutex_lock(&mdev->tconn->conf_update); - mdev->tconn->net_conf->want_lose = 0; /* without copy; single bit op is atomic */ + mdev->tconn->net_conf->discard_my_data = 0; /* without copy; single bit op is atomic */ mutex_unlock(&mdev->tconn->conf_update); drbd_md_sync(mdev); /* update connected indicator, la_size, ... */ diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 02647dc8c67..6aece551d87 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -161,7 +161,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __u32_field_def(24, GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF) __u32_field_def(25, GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) __flg_field_def(26, GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF) - __flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT, want_lose) + __flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT, discard_my_data) __flg_field_def(28, GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF) __flg_field_def(29, GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) __flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT, dry_run) From 7dc1d67f7c324e5bf23883b94508112ca7a70f62 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 3 May 2011 16:49:20 +0200 Subject: [PATCH 377/609] drbd: skip spurious wait_event in drbd_al_begin_io Activity log transaction writes are serialized on a bit lock. If several CPUs race to write an AL transaction, those that did not get the lock the first time may continue as soon as there are no more pending transactions. The do not need to all grab the lock in turn, just to realize that the AL is clean already, and they have nothing to do. This also closes a potential deadlock with drbd_adm_disk_opts. Once it got the AL bit lock, it knows there are no pending transactions, the AL is clean, and it should be safe to wait for all element references to drop to zero. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 933404e6ba2..aeb483daea0 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -212,13 +212,22 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i) unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); unsigned last = (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); unsigned enr; + bool locked = false; + D_ASSERT(atomic_read(&mdev->local_cnt) > 0); for (enr = first; enr <= last; enr++) wait_event(mdev->al_wait, _al_get(mdev, enr) != NULL); - if (mdev->act_log->pending_changes) { + /* Serialize multiple transactions. + * This uses test_and_set_bit, memory barrier is implicit. + */ + wait_event(mdev->al_wait, + mdev->act_log->pending_changes == 0 || + (locked = lc_try_lock_for_transaction(mdev->act_log))); + + if (locked) { /* drbd_al_write_transaction(mdev,al_ext,enr); * recurses into generic_make_request(), which * disallows recursion, bios being serialized on the @@ -226,13 +235,6 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i) * we have to delegate updates to the activity log * to the worker thread. */ - /* Serialize multiple transactions. - * This uses test_and_set_bit, memory barrier is implicit. - * Optimization potential: - * first check for transaction number > old transaction number, - * so not all waiters have to lock/unlock. */ - wait_event(mdev->al_wait, lc_try_lock_for_transaction(mdev->act_log)); - /* Double check: it may have been committed by someone else, * while we have been waiting for the lock. */ if (mdev->act_log->pending_changes) { From 6394b9358e6187414b7a6de7ba2c681ee4a790ac Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 11 May 2011 14:29:52 +0200 Subject: [PATCH 378/609] drbd: Refer to resync-rate consistently throughout the code Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 6 +++--- drivers/block/drbd/drbd_main.c | 4 ++-- drivers/block/drbd/drbd_receiver.c | 2 +- include/linux/drbd_genl.h | 2 +- include/linux/drbd_limits.h | 7 ++++--- 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index fa36757ffc4..8026adacd3d 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -389,21 +389,21 @@ struct p_barrier_ack { } __packed; struct p_rs_param { - u32 rate; + u32 resync_rate; /* Since protocol version 88 and higher. */ char verify_alg[0]; } __packed; struct p_rs_param_89 { - u32 rate; + u32 resync_rate; /* protocol version 89: */ char verify_alg[SHARED_SECRET_MAX]; char csums_alg[SHARED_SECRET_MAX]; } __packed; struct p_rs_param_95 { - u32 rate; + u32 resync_rate; char verify_alg[SHARED_SECRET_MAX]; char csums_alg[SHARED_SECRET_MAX]; u32 c_plan_ahead; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 86c8bc5ac60..26d7763d525 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -888,14 +888,14 @@ int drbd_send_sync_param(struct drbd_conf *mdev) if (get_ldev(mdev)) { dc = rcu_dereference(mdev->ldev->disk_conf); - p->rate = cpu_to_be32(dc->resync_rate); + p->resync_rate = cpu_to_be32(dc->resync_rate); p->c_plan_ahead = cpu_to_be32(dc->c_plan_ahead); p->c_delay_target = cpu_to_be32(dc->c_delay_target); p->c_fill_target = cpu_to_be32(dc->c_fill_target); p->c_max_rate = cpu_to_be32(dc->c_max_rate); put_ldev(mdev); } else { - p->rate = cpu_to_be32(DRBD_RATE_DEF); + p->resync_rate = cpu_to_be32(DRBD_RESYNC_RATE_DEF); p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF); p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF); p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e4e8f8a408d..684f7954272 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3221,7 +3221,7 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) old_disk_conf = mdev->ldev->disk_conf; *new_disk_conf = *old_disk_conf; - new_disk_conf->resync_rate = be32_to_cpu(p->rate); + new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate); } if (apv >= 88) { diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 6aece551d87..778708d9293 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -113,7 +113,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __u32_field_def(6, GENLA_F_MANDATORY, on_io_error, DRBD_ON_IO_ERROR_DEF) __u32_field_def(7, GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF) - __u32_field_def(8, GENLA_F_MANDATORY, resync_rate, DRBD_RATE_DEF) + __u32_field_def(8, GENLA_F_MANDATORY, resync_rate, DRBD_RESYNC_RATE_DEF) __u32_field_def(9, GENLA_F_MANDATORY, resync_after, DRBD_AFTER_DEF) __u32_field_def(10, GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF) __u32_field_def(11, GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF) diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 3d3e2d5125c..48339ae69d5 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -98,10 +98,11 @@ /* syncer { */ /* FIXME allow rate to be zero? */ -#define DRBD_RATE_MIN 1 +#define DRBD_RESYNC_RATE_MIN 1 /* channel bonding 10 GbE, or other hardware */ -#define DRBD_RATE_MAX (4 << 20) -#define DRBD_RATE_DEF 250 /* kb/second */ +#define DRBD_RESYNC_RATE_MAX (4 << 20) +#define DRBD_RESYNC_RATE_DEF 250 +#define DRBD_RESYNC_RATE_SCALE 'k' /* kilobytes */ /* less than 7 would hit performance unnecessarily. * 919 slots context information per transaction, From 69ef82dea4c34e4a0541fc3f415b0fef70fe12b0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 11 May 2011 14:34:35 +0200 Subject: [PATCH 379/609] drbd: Refer to connect-int consistently throughout the code Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 12 ++++++------ include/linux/drbd_genl.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 684f7954272..7deade196a3 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -617,7 +617,7 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn) struct sockaddr_in6 peer_in6; struct net_conf *nc; int err, peer_addr_len, my_addr_len; - int sndbuf_size, rcvbuf_size, try_connect_int; + int sndbuf_size, rcvbuf_size, connect_int; int disconnect_on_error = 1; rcu_read_lock(); @@ -629,7 +629,7 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn) sndbuf_size = nc->sndbuf_size; rcvbuf_size = nc->rcvbuf_size; - try_connect_int = nc->try_connect_int; + connect_int = nc->connect_int; my_addr_len = min_t(int, nc->my_addr_len, sizeof(src_in6)); memcpy(&src_in6, nc->my_addr, my_addr_len); @@ -653,7 +653,7 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn) } sock->sk->sk_rcvtimeo = - sock->sk->sk_sndtimeo = try_connect_int * HZ; + sock->sk->sk_sndtimeo = connect_int * HZ; drbd_setbufsize(sock, sndbuf_size, rcvbuf_size); /* explicitly bind to the configured IP as source IP @@ -702,7 +702,7 @@ out: static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn) { int timeo, err, my_addr_len; - int sndbuf_size, rcvbuf_size, try_connect_int; + int sndbuf_size, rcvbuf_size, connect_int; struct socket *s_estab = NULL, *s_listen; struct sockaddr_in6 my_addr; struct net_conf *nc; @@ -717,7 +717,7 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn) sndbuf_size = nc->sndbuf_size; rcvbuf_size = nc->rcvbuf_size; - try_connect_int = nc->try_connect_int; + connect_int = nc->connect_int; my_addr_len = min_t(int, nc->my_addr_len, sizeof(struct sockaddr_in6)); memcpy(&my_addr, nc->my_addr, my_addr_len); @@ -731,7 +731,7 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn) goto out; } - timeo = try_connect_int * HZ; + timeo = connect_int * HZ; timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */ s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */ diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 778708d9293..67c816c0fc2 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -143,7 +143,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __str_field_def(6, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) __str_field_def(7, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) __u32_field_def(8, GENLA_F_MANDATORY, wire_protocol, DRBD_PROTOCOL_DEF) - __u32_field_def(9, GENLA_F_MANDATORY, try_connect_int, DRBD_CONNECT_INT_DEF) + __u32_field_def(9, GENLA_F_MANDATORY, connect_int, DRBD_CONNECT_INT_DEF) __u32_field_def(10, GENLA_F_MANDATORY, timeout, DRBD_TIMEOUT_DEF) __u32_field_def(11, GENLA_F_MANDATORY, ping_int, DRBD_PING_INT_DEF) __u32_field_def(12, GENLA_F_MANDATORY, ping_timeo, DRBD_PING_TIMEO_DEF) From 95f8efd08bcce65df994049a292b94e56c7ada67 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 12 May 2011 11:15:34 +0200 Subject: [PATCH 380/609] drbd: Fix the upper limit of resync-after The 32-bit resync_after netlink field takes a device minor number as parameter, which is no longer limited to 255. We cannot statically verify which device numbers are valid, so set the ummer limit to the highest possible signed 32-bit integer. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 ++-- drivers/block/drbd/drbd_nl.c | 4 ++-- drivers/block/drbd/drbd_worker.c | 26 +++++++++++++------------- include/linux/drbd.h | 4 ++-- include/linux/drbd_genl.h | 2 +- include/linux/drbd_limits.h | 7 ++++--- 6 files changed, 24 insertions(+), 23 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8026adacd3d..e1672284076 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1408,8 +1408,8 @@ extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); /* drbd_worker.c */ extern int drbd_worker(struct drbd_thread *thi); -enum drbd_ret_code drbd_sync_after_valid(struct drbd_conf *mdev, int o_minor); -void drbd_sync_after_changed(struct drbd_conf *mdev); +enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor); +void drbd_resync_after_changed(struct drbd_conf *mdev); extern void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side); extern void resume_next_sg(struct drbd_conf *mdev); extern void suspend_other_sg(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 9a82306adf9..74c27f1507f 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1183,10 +1183,10 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) } write_lock_irq(&global_state_lock); - retcode = drbd_sync_after_valid(mdev, new_disk_conf->resync_after); + retcode = drbd_resync_after_valid(mdev, new_disk_conf->resync_after); if (retcode == NO_ERROR) { rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf); - drbd_sync_after_changed(mdev); + drbd_resync_after_changed(mdev); } write_unlock_irq(&global_state_lock); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index ec8f4245ef9..6410c55831e 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -57,7 +57,7 @@ static int w_make_ov_request(struct drbd_work *w, int cancel); /* About the global_state_lock Each state transition on an device holds a read lock. In case we have - to evaluate the sync after dependencies, we grab a write lock, because + to evaluate the resync after dependencies, we grab a write lock, because we need stable states on all devices for that. */ rwlock_t global_state_lock; @@ -1340,17 +1340,17 @@ int w_restart_disk_io(struct drbd_work *w, int cancel) static int _drbd_may_sync_now(struct drbd_conf *mdev) { struct drbd_conf *odev = mdev; - int ra; + int resync_after; while (1) { if (!odev->ldev) return 1; rcu_read_lock(); - ra = rcu_dereference(odev->ldev->disk_conf)->resync_after; + resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after; rcu_read_unlock(); - if (ra == -1) + if (resync_after == -1) return 1; - odev = minor_to_mdev(ra); + odev = minor_to_mdev(resync_after); if (!expect(odev)) return 1; if ((odev->state.conn >= C_SYNC_SOURCE && @@ -1426,36 +1426,36 @@ void suspend_other_sg(struct drbd_conf *mdev) } /* caller must hold global_state_lock */ -enum drbd_ret_code drbd_sync_after_valid(struct drbd_conf *mdev, int o_minor) +enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor) { struct drbd_conf *odev; - int ra; + int resync_after; if (o_minor == -1) return NO_ERROR; if (o_minor < -1 || minor_to_mdev(o_minor) == NULL) - return ERR_SYNC_AFTER; + return ERR_RESYNC_AFTER; /* check for loops */ odev = minor_to_mdev(o_minor); while (1) { if (odev == mdev) - return ERR_SYNC_AFTER_CYCLE; + return ERR_RESYNC_AFTER_CYCLE; rcu_read_lock(); - ra = rcu_dereference(odev->ldev->disk_conf)->resync_after; + resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after; rcu_read_unlock(); /* dependency chain ends here, no cycles. */ - if (ra == -1) + if (resync_after == -1) return NO_ERROR; /* follow the dependency chain */ - odev = minor_to_mdev(ra); + odev = minor_to_mdev(resync_after); } } /* caller must hold global_state_lock */ -void drbd_sync_after_changed(struct drbd_conf *mdev) +void drbd_resync_after_changed(struct drbd_conf *mdev) { int changes; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 6c7c85d8fc4..05063e6db81 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -130,8 +130,8 @@ enum drbd_ret_code { ERR_INTR = 129, /* EINTR */ ERR_RESIZE_RESYNC = 130, ERR_NO_PRIMARY = 131, - ERR_SYNC_AFTER = 132, - ERR_SYNC_AFTER_CYCLE = 133, + ERR_RESYNC_AFTER = 132, + ERR_RESYNC_AFTER_CYCLE = 133, ERR_PAUSE_IS_SET = 134, ERR_PAUSE_IS_CLEAR = 135, ERR_PACKET_NR = 137, diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 67c816c0fc2..a59466f7f66 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -114,7 +114,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __u32_field_def(7, GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF) __u32_field_def(8, GENLA_F_MANDATORY, resync_rate, DRBD_RESYNC_RATE_DEF) - __u32_field_def(9, GENLA_F_MANDATORY, resync_after, DRBD_AFTER_DEF) + __u32_field_def(9, GENLA_F_MANDATORY, resync_after, DRBD_RESYNC_AFTER_DEF) __u32_field_def(10, GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF) __u32_field_def(11, GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF) __u32_field_def(12, GENLA_F_MANDATORY, c_delay_target, DRBD_C_DELAY_TARGET_DEF) diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 48339ae69d5..c4a8f0fef7b 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -113,9 +113,10 @@ #define DRBD_AL_EXTENTS_MAX 6433 #define DRBD_AL_EXTENTS_DEF 127 -#define DRBD_AFTER_MIN -1 -#define DRBD_AFTER_MAX 255 -#define DRBD_AFTER_DEF -1 +#define DRBD_RESYNC_AFTER_MIN -1 +#define DRBD_RESYNC_AFTER_MAX (1<<30) +#define DRBD_RESYNC_AFTER_DEF -1 +#define DRBD_RESYNC_AFTER_SCALE '1' /* } */ From 3a45abd577727d2268e190d372600f8652883453 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 12 May 2011 12:02:54 +0200 Subject: [PATCH 381/609] drbd: Convert resync-after into a signed netlink field Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_genl.h | 2 +- include/linux/genl_magic_func.h | 3 +++ include/linux/genl_magic_struct.h | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index a59466f7f66..7b174a093a8 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -114,7 +114,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __u32_field_def(7, GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF) __u32_field_def(8, GENLA_F_MANDATORY, resync_rate, DRBD_RESYNC_RATE_DEF) - __u32_field_def(9, GENLA_F_MANDATORY, resync_after, DRBD_RESYNC_AFTER_DEF) + __s32_field_def(9, GENLA_F_MANDATORY, resync_after, DRBD_RESYNC_AFTER_DEF) __u32_field_def(10, GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF) __u32_field_def(11, GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF) __u32_field_def(12, GENLA_F_MANDATORY, c_delay_target, DRBD_C_DELAY_TARGET_DEF) diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index e908f1c5035..94e839aafae 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -437,6 +437,9 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ #undef __u32_field_def #define __u32_field_def(attr_nr, attr_flag, name, default) \ x->name = default; +#undef __s32_field_def +#define __s32_field_def(attr_nr, attr_flag, name, default) \ + x->name = default; #undef __flg_field_def #define __flg_field_def(attr_nr, attr_flag, name, default) \ x->name = default; diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index b1ddbb5bd72..0fca21fd1af 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -115,6 +115,8 @@ enum { __flg_field(attr_nr, attr_flag, name) #define __u32_field_def(attr_nr, attr_flag, name, default) \ __u32_field(attr_nr, attr_flag, name) +#define __s32_field_def(attr_nr, attr_flag, name, default) \ + __s32_field(attr_nr, attr_flag, name) #define __str_field_def(attr_nr, attr_flag, name, maxlen) \ __str_field(attr_nr, attr_flag, name, maxlen) From c5482bbd9607bf38cbc952eacaa429e6ba3160a0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 11 May 2011 14:44:55 +0200 Subject: [PATCH 382/609] drbd: Rename DISK_SIZE_SECT -> DISK_SIZE We don't have the units in constant names in other places, either. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index c4a8f0fef7b..8f8bbea545e 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -125,9 +125,10 @@ * the upper limit with 64bit kernel, enough ram and flexible meta data * is 1 PiB, currently. */ /* DRBD_MAX_SECTORS */ -#define DRBD_DISK_SIZE_SECT_MIN 0 -#define DRBD_DISK_SIZE_SECT_MAX (1 * (2LLU << 40)) -#define DRBD_DISK_SIZE_SECT_DEF 0 /* = disabled = no user size... */ +#define DRBD_DISK_SIZE_MIN 0 +#define DRBD_DISK_SIZE_MAX (16 * (2LLU << 30)) +#define DRBD_DISK_SIZE_DEF 0 /* = disabled = no user size... */ +#define DRBD_DISK_SIZE_SCALE 's' /* sectors */ #define DRBD_ON_IO_ERROR_DEF EP_DETACH #define DRBD_FENCING_DEF FP_DONT_CARE From dcb20d1a8e7d9602e52a9b673ae4d7f746d2cbb2 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 16 May 2011 14:30:24 +0200 Subject: [PATCH 383/609] drbd: Refuse to change network options online when... * the peer does not speak protocol_version 100 and the user wants to change one of: - wire_protocol - two_primaries - integrity_alg * the user wants to remove the allow_two_primaries flag when there are two primaries Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 22 ++++++++++++++++++---- include/linux/drbd.h | 1 + 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 74c27f1507f..133a6724657 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1722,10 +1722,24 @@ _check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct n struct drbd_conf *mdev; int i; - if (old_conf && tconn->agreed_pro_version < 100 && - tconn->cstate == C_WF_REPORT_PARAMS && - new_conf->wire_protocol != old_conf->wire_protocol) - return ERR_NEED_APV_100; + if (old_conf && tconn->cstate == C_WF_REPORT_PARAMS && tconn->agreed_pro_version < 100) { + if (new_conf->wire_protocol != old_conf->wire_protocol) + return ERR_NEED_APV_100; + + if (new_conf->two_primaries != old_conf->two_primaries) + return ERR_NEED_APV_100; + + if (!new_conf->integrity_alg != !old_conf->integrity_alg) + return ERR_NEED_APV_100; + + if (strcmp(new_conf->integrity_alg, old_conf->integrity_alg)) + return ERR_NEED_APV_100; + } + + if (!new_conf->two_primaries && + conn_highest_role(tconn) == R_PRIMARY && + conn_highest_peer(tconn) == R_PRIMARY) + return ERR_NEED_ALLOW_TWO_PRI; if (new_conf->two_primaries && (new_conf->wire_protocol != DRBD_PROT_C)) diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 05063e6db81..679e8112322 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -161,6 +161,7 @@ enum drbd_ret_code { ERR_MINOR_EXISTS = 161, ERR_INVALID_REQUEST = 162, ERR_NEED_APV_100 = 163, + ERR_NEED_ALLOW_TWO_PRI = 164, /* insert new ones above this line */ AFTER_LAST_ERR_CODE From 46e1ce4177aac86b8e39bc7bb5763e9d040ae5cb Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 16 May 2011 12:57:15 +0200 Subject: [PATCH 384/609] drbd: protect updates to integrits_tfm by tconn->data->mutex Since we need to hold that mutex anyways to make sure the peer gets that change in the right position in the data stream, it makes a lot of sense to use the same mutex to ensure existence of the tfm. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e1672284076..c59012f9685 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -848,7 +848,7 @@ struct drbd_tconn { /* is a resource from the config file */ struct list_head out_of_sequence_requests; struct crypto_hash *cram_hmac_tfm; - struct crypto_hash *integrity_tfm; /* checksums we compute */ + struct crypto_hash *integrity_tfm; /* checksums we compute, updates protected by tconn->data->mutex */ struct crypto_hash *peer_integrity_tfm; /* checksums we verify */ struct crypto_hash *csums_tfm; struct crypto_hash *verify_tfm; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 26d7763d525..bca599a3c1e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1720,11 +1720,11 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) int dgs; int err; + sock = &mdev->tconn->data; + p = drbd_prepare_command(mdev, sock); dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_tfm) ? crypto_hash_digestsize(mdev->tconn->integrity_tfm) : 0; - sock = &mdev->tconn->data; - p = drbd_prepare_command(mdev, sock); if (!p) return -EIO; p->sector = cpu_to_be64(req->i.sector); @@ -1793,11 +1793,12 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, int err; int dgs; + sock = &mdev->tconn->data; + p = drbd_prepare_command(mdev, sock); + dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_tfm) ? crypto_hash_digestsize(mdev->tconn->integrity_tfm) : 0; - sock = &mdev->tconn->data; - p = drbd_prepare_command(mdev, sock); if (!p) return -EIO; p->sector = cpu_to_be64(peer_req->i.sector); From f179d76d76ce6653ba4c6a6b0c2e7c42215734bc Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 16 May 2011 17:31:47 +0200 Subject: [PATCH 385/609] drbd: Made cmp_after_sb() more generic into convert_after_sb() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7deade196a3..b5eeb8de407 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2984,25 +2984,18 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol return rv; } -/* returns 1 if invalid */ -static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self) +static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer) { /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */ - if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) || - (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL)) - return 0; + if (peer == ASB_DISCARD_REMOTE) + return ASB_DISCARD_LOCAL; /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */ - if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL || - self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL) - return 1; + if (peer == ASB_DISCARD_LOCAL) + return ASB_DISCARD_REMOTE; /* everything else is valid if they are equal on both sides. */ - if (peer == self) - return 0; - - /* everything es is invalid. */ - return 1; + return peer; } static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) @@ -3060,17 +3053,17 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) goto disconnect_rcu_unlock; } - if (cmp_after_sb(p_after_sb_0p, nc->after_sb_0p)) { + if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) { conn_err(tconn, "incompatible after-sb-0pri settings\n"); goto disconnect_rcu_unlock; } - if (cmp_after_sb(p_after_sb_1p, nc->after_sb_1p)) { + if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) { conn_err(tconn, "incompatible after-sb-1pri settings\n"); goto disconnect_rcu_unlock; } - if (cmp_after_sb(p_after_sb_2p, nc->after_sb_2p)) { + if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) { conn_err(tconn, "incompatible after-sb-2pri settings\n"); goto disconnect_rcu_unlock; } From 7aca6c754976f2813a3fcc2f5068b8fe09eb219c Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 17 May 2011 10:12:56 +0200 Subject: [PATCH 386/609] drbd: Allocation of int_dig_in and int_dig_vv was missing Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index b5eeb8de407..61b57fc4867 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3004,6 +3004,7 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; int p_discard_my_data, p_two_primaries, cf; struct net_conf *nc; + void *int_dig_in = NULL, *int_dig_vv = NULL; p_proto = be32_to_cpu(p->protocol); p_after_sb_0p = be32_to_cpu(p->after_sb_0p); @@ -3026,6 +3027,8 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) integrity_alg[SHARED_SECRET_MAX-1] = 0; if (integrity_alg[0]) { + int hash_size; + tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC); if (!tfm) { conn_err(tconn, "peer data-integrity-alg %s not supported\n", @@ -3033,11 +3036,23 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) goto disconnect; } conn_info(tconn, "peer data-integrity-alg: %s\n", integrity_alg); + + hash_size = crypto_hash_digestsize(tfm); + int_dig_in = kmalloc(hash_size, GFP_KERNEL); + int_dig_vv = kmalloc(hash_size, GFP_KERNEL); + if (!(int_dig_in && int_dig_vv)) { + crypto_free_hash(tfm); + goto disconnect; + } } if (tconn->peer_integrity_tfm) crypto_free_hash(tconn->peer_integrity_tfm); tconn->peer_integrity_tfm = tfm; + kfree(tconn->int_dig_in); + kfree(tconn->int_dig_vv); + tconn->int_dig_in = int_dig_in; + tconn->int_dig_vv = int_dig_vv; } clear_bit(CONN_DRY_RUN, &tconn->flags); From 036b17eaab935541835f4cc1fcbfe62ead5faa55 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 16 May 2011 17:38:11 +0200 Subject: [PATCH 387/609] drbd: Receiving part for the PROTOCOL_UPDATE packet Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 3 +- drivers/block/drbd/drbd_main.c | 2 + drivers/block/drbd/drbd_receiver.c | 73 ++++++++++++++++++++++++------ 3 files changed, 63 insertions(+), 15 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c59012f9685..75b26d820d7 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -225,6 +225,7 @@ enum drbd_packet { P_CONN_ST_CHG_REQ = 0x2a, /* data sock: Connection wide state request */ P_CONN_ST_CHG_REPLY = 0x2b, /* meta sock: Connection side state req reply */ P_RETRY_WRITE = 0x2c, /* Protocol C: retry conflicting write request */ + P_PROTOCOL_UPDATE = 0x2d, /* data sock: is used in established connections */ P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ P_MAX_OPT_CMD = 0x101, @@ -849,7 +850,7 @@ struct drbd_tconn { /* is a resource from the config file */ struct crypto_hash *cram_hmac_tfm; struct crypto_hash *integrity_tfm; /* checksums we compute, updates protected by tconn->data->mutex */ - struct crypto_hash *peer_integrity_tfm; /* checksums we verify */ + struct crypto_hash *peer_integrity_tfm; /* checksums we verify, only accessed from receiver thread */ struct crypto_hash *csums_tfm; struct crypto_hash *verify_tfm; void *int_dig_in; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index bca599a3c1e..4133335e6e4 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3290,6 +3290,8 @@ const char *cmdname(enum drbd_packet cmd) [P_RS_CANCEL] = "RSCancel", [P_CONN_ST_CHG_REQ] = "conn_st_chg_req", [P_CONN_ST_CHG_REPLY] = "conn_st_chg_reply", + [P_RETRY_WRITE] = "retry_write", + [P_PROTOCOL_UPDATE] = "protocol_update", /* enum drbd_packet, but not commands - obsoleted flags: * P_MAY_IGNORE diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 61b57fc4867..96113032b03 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3001,9 +3001,11 @@ static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer) static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) { struct p_protocol *p = pi->data; - int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; - int p_discard_my_data, p_two_primaries, cf; - struct net_conf *nc; + enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; + int p_proto, p_discard_my_data, p_two_primaries, cf; + struct net_conf *nc, *old_net_conf, *new_net_conf = NULL; + char integrity_alg[SHARED_SECRET_MAX] = ""; + struct crypto_hash *peer_tfm = NULL, *tfm = NULL; void *int_dig_in = NULL, *int_dig_vv = NULL; p_proto = be32_to_cpu(p->protocol); @@ -3015,8 +3017,6 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) p_discard_my_data = cf & CF_DISCARD_MY_DATA; if (tconn->agreed_pro_version >= 87) { - char integrity_alg[SHARED_SECRET_MAX]; - struct crypto_hash *tfm = NULL; int err; if (pi->size > sizeof(integrity_alg)) @@ -3024,35 +3024,70 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) err = drbd_recv_all(tconn, integrity_alg, pi->size); if (err) return err; - integrity_alg[SHARED_SECRET_MAX-1] = 0; + integrity_alg[SHARED_SECRET_MAX - 1] = 0; + } + if (pi->cmd == P_PROTOCOL_UPDATE) { if (integrity_alg[0]) { int hash_size; - tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC); - if (!tfm) { + peer_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC); + tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC); + if (!(peer_tfm && tfm)) { conn_err(tconn, "peer data-integrity-alg %s not supported\n", integrity_alg); goto disconnect; } - conn_info(tconn, "peer data-integrity-alg: %s\n", integrity_alg); hash_size = crypto_hash_digestsize(tfm); int_dig_in = kmalloc(hash_size, GFP_KERNEL); int_dig_vv = kmalloc(hash_size, GFP_KERNEL); if (!(int_dig_in && int_dig_vv)) { - crypto_free_hash(tfm); + conn_err(tconn, "Allocation of buffers for data integrity checking failed\n"); goto disconnect; } } - if (tconn->peer_integrity_tfm) - crypto_free_hash(tconn->peer_integrity_tfm); - tconn->peer_integrity_tfm = tfm; + new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL); + if (!new_net_conf) { + conn_err(tconn, "Allocation of new net_conf failed\n"); + goto disconnect; + } + + mutex_lock(&tconn->data.mutex); + mutex_lock(&tconn->conf_update); + old_net_conf = tconn->net_conf; + *new_net_conf = *old_net_conf; + + new_net_conf->wire_protocol = p_proto; + new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p); + new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p); + new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p); + new_net_conf->two_primaries = p_two_primaries; + strcpy(new_net_conf->integrity_alg, integrity_alg); + new_net_conf->integrity_alg_len = strlen(integrity_alg) + 1; + + crypto_free_hash(tconn->integrity_tfm); + tconn->integrity_tfm = tfm; + + rcu_assign_pointer(tconn->net_conf, new_net_conf); + mutex_unlock(&tconn->conf_update); + mutex_unlock(&tconn->data.mutex); + + crypto_free_hash(tconn->peer_integrity_tfm); kfree(tconn->int_dig_in); kfree(tconn->int_dig_vv); + tconn->peer_integrity_tfm = peer_tfm; tconn->int_dig_in = int_dig_in; tconn->int_dig_vv = int_dig_vv; + + if (strcmp(old_net_conf->integrity_alg, integrity_alg)) + conn_info(tconn, "peer data-integrity-alg: %s\n", integrity_alg); + + synchronize_rcu(); + kfree(old_net_conf); + + return 0; } clear_bit(CONN_DRY_RUN, &tconn->flags); @@ -3063,7 +3098,7 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) rcu_read_lock(); nc = rcu_dereference(tconn->net_conf); - if (p_proto != nc->wire_protocol && tconn->agreed_pro_version < 100) { + if (p_proto != nc->wire_protocol) { conn_err(tconn, "incompatible communication protocols\n"); goto disconnect_rcu_unlock; } @@ -3093,6 +3128,11 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) goto disconnect_rcu_unlock; } + if (strcmp(integrity_alg, nc->integrity_alg)) { + conn_err(tconn, "incompatible setting of the data-integrity-alg\n"); + goto disconnect_rcu_unlock; + } + rcu_read_unlock(); return 0; @@ -3100,6 +3140,10 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) disconnect_rcu_unlock: rcu_read_unlock(); disconnect: + crypto_free_hash(peer_tfm); + crypto_free_hash(tfm); + kfree(int_dig_in); + kfree(int_dig_vv); conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); return -EIO; } @@ -4197,6 +4241,7 @@ static struct data_cmd drbd_cmd_handler[] = { [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip }, [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync }, [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state }, + [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol }, }; static void drbdd(struct drbd_tconn *tconn) From d659f2aaea1bd2fe9cf68b33d881470678c6ce2b Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 16 May 2011 17:38:45 +0200 Subject: [PATCH 388/609] drbd: Send PROTOCOL_UPDATE packets when appropriate Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 6 +++--- drivers/block/drbd/drbd_nl.c | 11 ++--------- 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 75b26d820d7..c3019730a24 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1051,7 +1051,7 @@ extern int drbd_send(struct drbd_tconn *tconn, struct socket *sock, extern int drbd_send_all(struct drbd_tconn *, struct socket *, void *, size_t, unsigned); -extern int __drbd_send_protocol(struct drbd_tconn *tconn); +extern int __drbd_send_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd); extern int drbd_send_protocol(struct drbd_tconn *tconn); extern int drbd_send_uuids(struct drbd_conf *mdev); extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 4133335e6e4..dab0a75c63b 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -911,7 +911,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev) return drbd_send_command(mdev, sock, cmd, size, NULL, 0); } -int __drbd_send_protocol(struct drbd_tconn *tconn) +int __drbd_send_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd) { struct drbd_socket *sock; struct p_protocol *p; @@ -953,7 +953,7 @@ int __drbd_send_protocol(struct drbd_tconn *tconn) strcpy(p->integrity_alg, nc->integrity_alg); rcu_read_unlock(); - return __conn_send_command(tconn, sock, P_PROTOCOL, size, NULL, 0); + return __conn_send_command(tconn, sock, cmd, size, NULL, 0); } int drbd_send_protocol(struct drbd_tconn *tconn) @@ -961,7 +961,7 @@ int drbd_send_protocol(struct drbd_tconn *tconn) int err; mutex_lock(&tconn->data.mutex); - err = __drbd_send_protocol(tconn); + err = __drbd_send_protocol(tconn, P_PROTOCOL); mutex_unlock(&tconn->data.mutex); return err; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 133a6724657..b81f924c47b 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1866,7 +1866,6 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) int ovr; /* online verify running */ int rsr; /* re-sync running */ struct crypto crypto = { }; - bool change_integrity_alg; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); if (!adm_ctx.reply_skb) @@ -1923,9 +1922,6 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) goto fail; } - change_integrity_alg = strcmp(old_conf->integrity_alg, - new_conf->integrity_alg); - retcode = alloc_crypto(&crypto, new_conf); if (retcode != NO_ERROR) goto fail; @@ -1949,13 +1945,10 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) tconn->int_dig_vv = crypto.int_dig_vv; crypto_free_hash(tconn->integrity_tfm); tconn->integrity_tfm = crypto.integrity_tfm; - if (change_integrity_alg) { + if (tconn->cstate >= C_WF_REPORT_PARAMS && tconn->agreed_pro_version >= 100) /* Do this without trying to take tconn->data.mutex again. */ - if (__drbd_send_protocol(tconn)) - goto fail; - } + __drbd_send_protocol(tconn, P_PROTOCOL_UPDATE); - /* FIXME Changing cram_hmac while the connection is established is useless */ crypto_free_hash(tconn->cram_hmac_tfm); tconn->cram_hmac_tfm = crypto.cram_hmac_tfm; From 309f0b70ab789bf85c5f5f32dbc466d42f024747 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 13 May 2011 01:24:14 +0200 Subject: [PATCH 389/609] drbd: Use more generic constant names These constants are useful for the same purpose in more than one place. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 8f8bbea545e..3627f760966 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -113,10 +113,10 @@ #define DRBD_AL_EXTENTS_MAX 6433 #define DRBD_AL_EXTENTS_DEF 127 -#define DRBD_RESYNC_AFTER_MIN -1 -#define DRBD_RESYNC_AFTER_MAX (1<<30) -#define DRBD_RESYNC_AFTER_DEF -1 -#define DRBD_RESYNC_AFTER_SCALE '1' +#define DRBD_MINOR_NUMBER_MIN -1 +#define DRBD_MINOR_NUMBER_MAX (1<<30) +#define DRBD_MINOR_NUMBER_DEF -1 +#define DRBD_MINOR_NUMBER_SCALE '1' /* } */ From 509100e6012db92f4af3796436b450447c6c8268 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 17 May 2011 13:29:46 +0200 Subject: [PATCH 390/609] drbd: Output signed / unsigned netlink fields correctly Note: All input values are still treated as signed; unsigned long long values are still broken. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/genl_magic_func.h | 23 +++++++++----- include/linux/genl_magic_struct.h | 52 ++++++++++++++++++++++--------- 2 files changed, 53 insertions(+), 22 deletions(-) diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 94e839aafae..2ae16126c6a 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -78,12 +78,13 @@ static struct nla_policy s_name ## _nl_policy[] __read_mostly = \ { s_fields }; #undef __field -#define __field(attr_nr, attr_flag, name, nla_type, _type, __get, __put) \ +#define __field(attr_nr, attr_flag, name, nla_type, _type, __get, \ + __put, __is_signed) \ [__nla_type(attr_nr)] = { .type = nla_type }, #undef __array #define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen, \ - __get, __put) \ + __get, __put, __is_signed) \ [__nla_type(attr_nr)] = { .type = nla_type, \ .len = maxlen - (nla_type == NLA_NUL_STRING) }, @@ -241,7 +242,8 @@ static int s_name ## _from_attrs_for_change(struct s_name *s, \ } #undef __field -#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ + __is_signed) \ __assign(attr_nr, attr_flag, name, nla_type, type, \ if (s) \ s->name = __get(nla); \ @@ -249,7 +251,8 @@ static int s_name ## _from_attrs_for_change(struct s_name *s, \ /* validate_nla() already checked nla_len <= maxlen appropriately. */ #undef __array -#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ + __get, __put, __is_signed) \ __assign(attr_nr, attr_flag, name, nla_type, type, \ if (s) \ s->name ## _len = \ @@ -410,14 +413,16 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ #undef __field -#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ + __is_signed) \ if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) { \ DPRINT_FIELD(">>", nla_type, name, s, NULL); \ __put(skb, attr_nr, s->name); \ } #undef __array -#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ + __get, __put, __is_signed) \ if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) { \ DPRINT_ARRAY(">>",nla_type, name, s, NULL); \ __put(skb, attr_nr, min_t(int, maxlen, \ @@ -431,9 +436,11 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ /* Functions for initializing structs to default values. */ #undef __field -#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ + __is_signed) #undef __array -#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ + __get, __put, __is_signed) #undef __u32_field_def #define __u32_field_def(attr_nr, attr_flag, name, default) \ x->name = default; diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index 0fca21fd1af..ba911da84d9 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -87,28 +87,28 @@ enum { /* possible field types */ #define __flg_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U8, char, \ - nla_get_u8, NLA_PUT_U8) + nla_get_u8, NLA_PUT_U8, false) #define __u8_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \ - nla_get_u8, NLA_PUT_U8) + nla_get_u8, NLA_PUT_U8, false) #define __u16_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U16, __u16, \ - nla_get_u16, NLA_PUT_U16) + nla_get_u16, NLA_PUT_U16, false) #define __u32_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U32, __u32, \ - nla_get_u32, NLA_PUT_U32) + nla_get_u32, NLA_PUT_U32, false) #define __s32_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U32, __s32, \ - nla_get_u32, NLA_PUT_U32) + nla_get_u32, NLA_PUT_U32, true) #define __u64_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U64, __u64, \ - nla_get_u64, NLA_PUT_U64) + nla_get_u64, NLA_PUT_U64, false) #define __str_field(attr_nr, attr_flag, name, maxlen) \ __array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \ - nla_strlcpy, NLA_PUT) + nla_strlcpy, NLA_PUT, false) #define __bin_field(attr_nr, attr_flag, name, maxlen) \ __array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \ - nla_memcpy, NLA_PUT) + nla_memcpy, NLA_PUT, false) /* fields with default values */ #define __flg_field_def(attr_nr, attr_flag, name, default) \ @@ -174,11 +174,13 @@ enum { \ }; #undef __field -#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ +#define __field(attr_nr, attr_flag, name, nla_type, type, \ + __get, __put, __is_signed) \ T_ ## name = (__u16)(attr_nr | attr_flag), #undef __array -#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ +#define __array(attr_nr, attr_flag, name, nla_type, type, \ + maxlen, __get, __put, __is_signed) \ T_ ## name = (__u16)(attr_nr | attr_flag), #include GENL_MAGIC_INCLUDE_FILE @@ -238,11 +240,13 @@ static inline void ct_assert_unique_ ## s_name ## _attributes(void) \ } #undef __field -#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ + __is_signed) \ case attr_nr: #undef __array -#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ + __get, __put, __is_signed) \ case attr_nr: #include GENL_MAGIC_INCLUDE_FILE @@ -260,16 +264,36 @@ static inline void ct_assert_unique_ ## s_name ## _attributes(void) \ struct s_name { s_fields }; #undef __field -#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ + __is_signed) \ type name; #undef __array -#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ + __get, __put, __is_signed) \ type name[maxlen]; \ __u32 name ## _len; #include GENL_MAGIC_INCLUDE_FILE +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +enum { \ + s_fields \ +}; + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ + is_signed) \ + F_ ## name ## _IS_SIGNED = is_signed, + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ + __get, __put, is_signed) \ + F_ ## name ## _IS_SIGNED = is_signed, + +#include GENL_MAGIC_INCLUDE_FILE + /* }}}1 */ #endif /* GENL_MAGIC_STRUCT_H */ /* vim: set foldmethod=marker nofoldenable : */ From bbbef2d5ad8203f67274196dac90754ae106a463 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 18 May 2011 16:48:16 +0200 Subject: [PATCH 391/609] drbd: Remove unused GENLA_F_MAY_IGNORE flag Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/genl_magic_struct.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index ba911da84d9..f3c3425ac30 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -40,11 +40,6 @@ extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void); * yet implemented features, if newer userland tries to use them even though * the genl_family version clearly indicates they are not available. * - * @GENLA_F_MAY_IGNORE: To clearly document the fact, for good measure. - * To be used for API extensions for things that have sane defaults, - * so newer userland can still talk to older kernel, knowing it will - * silently ignore these attributes if not yet known. - * * NOTE: These flags overload * NLA_F_NESTED (1 << 15) * NLA_F_NET_BYTEORDER (1 << 14) @@ -55,7 +50,6 @@ extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void); * See also: nla_type() */ enum { - GENLA_F_MAY_IGNORE = 0, GENLA_F_MANDATORY = 1 << 14, GENLA_F_REQUIRED = 1 << 15, From 5f9359201b5cf1d94fe0e0c47fcba38cfc921863 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 19 May 2011 17:39:28 +0200 Subject: [PATCH 392/609] drbd: Make drbd's use of netlink attribute flags less confusing Make it more clear in the flag names which flags are internal to drbd, and which are not. The check for mandatory attributes is the only extension visible at the netlink layer. Attributes with this flag set would look like unknown attributes to some kernel versions. The netlink layer would ignore them and also skip consistency checks on the attribute type and legth. To avoid this, we check for mandatory attributes first, remove the mandatory flag, and then process the attributes normally. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_genl.h | 238 +++++++++++++++--------------- include/linux/genl_magic_func.h | 107 ++++++-------- include/linux/genl_magic_struct.h | 68 ++++----- 3 files changed, 192 insertions(+), 221 deletions(-) diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 7b174a093a8..4ceecb9307d 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -86,7 +86,7 @@ */ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, /* "arbitrary" size strings, nla_policy.len = 0 */ - __str_field(1, GENLA_F_MANDATORY, info_text, 0) + __str_field(1, DRBD_GENLA_F_MANDATORY, info_text, 0) ) /* Configuration requests typically need a context to operate on. @@ -95,133 +95,133 @@ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, * and/or the replication group (aka resource) name, * and the volume id within the resource. */ GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, - __u32_field(1, GENLA_F_MANDATORY, ctx_volume) - __str_field(2, GENLA_F_MANDATORY, ctx_conn_name, 128) + __u32_field(1, DRBD_GENLA_F_MANDATORY, ctx_volume) + __str_field(2, DRBD_GENLA_F_MANDATORY, ctx_conn_name, 128) ) GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, - __str_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT, backing_dev, 128) - __str_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT, meta_dev, 128) - __s32_field(3, GENLA_F_REQUIRED | GENLA_F_INVARIANT, meta_dev_idx) + __str_field(1, DRBD_F_REQUIRED | DRBD_F_INVARIANT, backing_dev, 128) + __str_field(2, DRBD_F_REQUIRED | DRBD_F_INVARIANT, meta_dev, 128) + __s32_field(3, DRBD_F_REQUIRED | DRBD_F_INVARIANT, meta_dev_idx) /* use the resize command to try and change the disk_size */ - __u64_field(4, GENLA_F_MANDATORY | GENLA_F_INVARIANT, disk_size) + __u64_field(4, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, disk_size) /* we could change the max_bio_bvecs, * but it won't propagate through the stack */ - __u32_field(5, GENLA_F_MANDATORY | GENLA_F_INVARIANT, max_bio_bvecs) + __u32_field(5, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, max_bio_bvecs) - __u32_field_def(6, GENLA_F_MANDATORY, on_io_error, DRBD_ON_IO_ERROR_DEF) - __u32_field_def(7, GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF) + __u32_field_def(6, DRBD_GENLA_F_MANDATORY, on_io_error, DRBD_ON_IO_ERROR_DEF) + __u32_field_def(7, DRBD_GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF) - __u32_field_def(8, GENLA_F_MANDATORY, resync_rate, DRBD_RESYNC_RATE_DEF) - __s32_field_def(9, GENLA_F_MANDATORY, resync_after, DRBD_RESYNC_AFTER_DEF) - __u32_field_def(10, GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF) - __u32_field_def(11, GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF) - __u32_field_def(12, GENLA_F_MANDATORY, c_delay_target, DRBD_C_DELAY_TARGET_DEF) - __u32_field_def(13, GENLA_F_MANDATORY, c_fill_target, DRBD_C_FILL_TARGET_DEF) - __u32_field_def(14, GENLA_F_MANDATORY, c_max_rate, DRBD_C_MAX_RATE_DEF) - __u32_field_def(15, GENLA_F_MANDATORY, c_min_rate, DRBD_C_MIN_RATE_DEF) + __u32_field_def(8, DRBD_GENLA_F_MANDATORY, resync_rate, DRBD_RESYNC_RATE_DEF) + __s32_field_def(9, DRBD_GENLA_F_MANDATORY, resync_after, DRBD_MINOR_NUMBER_DEF) + __u32_field_def(10, DRBD_GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF) + __u32_field_def(11, DRBD_GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF) + __u32_field_def(12, DRBD_GENLA_F_MANDATORY, c_delay_target, DRBD_C_DELAY_TARGET_DEF) + __u32_field_def(13, DRBD_GENLA_F_MANDATORY, c_fill_target, DRBD_C_FILL_TARGET_DEF) + __u32_field_def(14, DRBD_GENLA_F_MANDATORY, c_max_rate, DRBD_C_MAX_RATE_DEF) + __u32_field_def(15, DRBD_GENLA_F_MANDATORY, c_min_rate, DRBD_C_MIN_RATE_DEF) - __flg_field_def(16, GENLA_F_MANDATORY, disk_barrier, DRBD_DISK_BARRIER_DEF) - __flg_field_def(17, GENLA_F_MANDATORY, disk_flushes, DRBD_DISK_FLUSHES_DEF) - __flg_field_def(18, GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) - __flg_field_def(19, GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) + __flg_field_def(16, DRBD_GENLA_F_MANDATORY, disk_barrier, DRBD_DISK_BARRIER_DEF) + __flg_field_def(17, DRBD_GENLA_F_MANDATORY, disk_flushes, DRBD_DISK_FLUSHES_DEF) + __flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) + __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, - __str_field_def(1, GENLA_F_MANDATORY, cpu_mask, 32) - __u32_field_def(2, GENLA_F_MANDATORY, on_no_data, DRBD_ON_NO_DATA_DEF) + __str_field_def(1, DRBD_GENLA_F_MANDATORY, cpu_mask, 32) + __u32_field_def(2, DRBD_GENLA_F_MANDATORY, on_no_data, DRBD_ON_NO_DATA_DEF) ) GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, - __bin_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT, my_addr, 128) - __bin_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT, peer_addr, 128) - __str_field_def(3, GENLA_F_MANDATORY | GENLA_F_SENSITIVE, + __bin_field(1, DRBD_F_REQUIRED | DRBD_F_INVARIANT, my_addr, 128) + __bin_field(2, DRBD_F_REQUIRED | DRBD_F_INVARIANT, peer_addr, 128) + __str_field_def(3, DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE, shared_secret, SHARED_SECRET_MAX) - __str_field_def(4, GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) - __str_field_def(5, GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) - __str_field_def(6, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) - __str_field_def(7, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) - __u32_field_def(8, GENLA_F_MANDATORY, wire_protocol, DRBD_PROTOCOL_DEF) - __u32_field_def(9, GENLA_F_MANDATORY, connect_int, DRBD_CONNECT_INT_DEF) - __u32_field_def(10, GENLA_F_MANDATORY, timeout, DRBD_TIMEOUT_DEF) - __u32_field_def(11, GENLA_F_MANDATORY, ping_int, DRBD_PING_INT_DEF) - __u32_field_def(12, GENLA_F_MANDATORY, ping_timeo, DRBD_PING_TIMEO_DEF) - __u32_field_def(13, GENLA_F_MANDATORY, sndbuf_size, DRBD_SNDBUF_SIZE_DEF) - __u32_field_def(14, GENLA_F_MANDATORY, rcvbuf_size, DRBD_RCVBUF_SIZE_DEF) - __u32_field_def(15, GENLA_F_MANDATORY, ko_count, DRBD_KO_COUNT_DEF) - __u32_field_def(16, GENLA_F_MANDATORY, max_buffers, DRBD_MAX_BUFFERS_DEF) - __u32_field_def(17, GENLA_F_MANDATORY, max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF) - __u32_field_def(18, GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) - __u32_field_def(19, GENLA_F_MANDATORY, after_sb_0p, DRBD_AFTER_SB_0P_DEF) - __u32_field_def(20, GENLA_F_MANDATORY, after_sb_1p, DRBD_AFTER_SB_1P_DEF) - __u32_field_def(21, GENLA_F_MANDATORY, after_sb_2p, DRBD_AFTER_SB_2P_DEF) - __u32_field_def(22, GENLA_F_MANDATORY, rr_conflict, DRBD_RR_CONFLICT_DEF) - __u32_field_def(23, GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF) - __u32_field_def(24, GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF) - __u32_field_def(25, GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) - __flg_field_def(26, GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF) - __flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT, discard_my_data) - __flg_field_def(28, GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF) - __flg_field_def(29, GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) - __flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT, dry_run) - __flg_field_def(31, GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) + __str_field_def(4, DRBD_GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) + __str_field_def(5, DRBD_GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) + __str_field_def(6, DRBD_GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) + __str_field_def(7, DRBD_GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) + __u32_field_def(8, DRBD_GENLA_F_MANDATORY, wire_protocol, DRBD_PROTOCOL_DEF) + __u32_field_def(9, DRBD_GENLA_F_MANDATORY, connect_int, DRBD_CONNECT_INT_DEF) + __u32_field_def(10, DRBD_GENLA_F_MANDATORY, timeout, DRBD_TIMEOUT_DEF) + __u32_field_def(11, DRBD_GENLA_F_MANDATORY, ping_int, DRBD_PING_INT_DEF) + __u32_field_def(12, DRBD_GENLA_F_MANDATORY, ping_timeo, DRBD_PING_TIMEO_DEF) + __u32_field_def(13, DRBD_GENLA_F_MANDATORY, sndbuf_size, DRBD_SNDBUF_SIZE_DEF) + __u32_field_def(14, DRBD_GENLA_F_MANDATORY, rcvbuf_size, DRBD_RCVBUF_SIZE_DEF) + __u32_field_def(15, DRBD_GENLA_F_MANDATORY, ko_count, DRBD_KO_COUNT_DEF) + __u32_field_def(16, DRBD_GENLA_F_MANDATORY, max_buffers, DRBD_MAX_BUFFERS_DEF) + __u32_field_def(17, DRBD_GENLA_F_MANDATORY, max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF) + __u32_field_def(18, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) + __u32_field_def(19, DRBD_GENLA_F_MANDATORY, after_sb_0p, DRBD_AFTER_SB_0P_DEF) + __u32_field_def(20, DRBD_GENLA_F_MANDATORY, after_sb_1p, DRBD_AFTER_SB_1P_DEF) + __u32_field_def(21, DRBD_GENLA_F_MANDATORY, after_sb_2p, DRBD_AFTER_SB_2P_DEF) + __u32_field_def(22, DRBD_GENLA_F_MANDATORY, rr_conflict, DRBD_RR_CONFLICT_DEF) + __u32_field_def(23, DRBD_GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF) + __u32_field_def(24, DRBD_GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF) + __u32_field_def(25, DRBD_GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) + __flg_field_def(26, DRBD_GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF) + __flg_field(27, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, discard_my_data) + __flg_field_def(28, DRBD_GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF) + __flg_field_def(29, DRBD_GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) + __flg_field(30, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, dry_run) + __flg_field_def(31, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, - __flg_field(1, GENLA_F_MANDATORY, assume_uptodate) + __flg_field(1, DRBD_GENLA_F_MANDATORY, assume_uptodate) ) GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms, - __u64_field(1, GENLA_F_MANDATORY, resize_size) - __flg_field(2, GENLA_F_MANDATORY, resize_force) - __flg_field(3, GENLA_F_MANDATORY, no_resync) + __u64_field(1, DRBD_GENLA_F_MANDATORY, resize_size) + __flg_field(2, DRBD_GENLA_F_MANDATORY, resize_force) + __flg_field(3, DRBD_GENLA_F_MANDATORY, no_resync) ) GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info, /* the reason of the broadcast, * if this is an event triggered broadcast. */ - __u32_field(1, GENLA_F_MANDATORY, sib_reason) - __u32_field(2, GENLA_F_REQUIRED, current_state) - __u64_field(3, GENLA_F_MANDATORY, capacity) - __u64_field(4, GENLA_F_MANDATORY, ed_uuid) + __u32_field(1, DRBD_GENLA_F_MANDATORY, sib_reason) + __u32_field(2, DRBD_F_REQUIRED, current_state) + __u64_field(3, DRBD_GENLA_F_MANDATORY, capacity) + __u64_field(4, DRBD_GENLA_F_MANDATORY, ed_uuid) /* These are for broadcast from after state change work. * prev_state and new_state are from the moment the state change took * place, new_state is not neccessarily the same as current_state, * there may have been more state changes since. Which will be * broadcasted soon, in their respective after state change work. */ - __u32_field(5, GENLA_F_MANDATORY, prev_state) - __u32_field(6, GENLA_F_MANDATORY, new_state) + __u32_field(5, DRBD_GENLA_F_MANDATORY, prev_state) + __u32_field(6, DRBD_GENLA_F_MANDATORY, new_state) /* if we have a local disk: */ - __bin_field(7, GENLA_F_MANDATORY, uuids, (UI_SIZE*sizeof(__u64))) - __u32_field(8, GENLA_F_MANDATORY, disk_flags) - __u64_field(9, GENLA_F_MANDATORY, bits_total) - __u64_field(10, GENLA_F_MANDATORY, bits_oos) + __bin_field(7, DRBD_GENLA_F_MANDATORY, uuids, (UI_SIZE*sizeof(__u64))) + __u32_field(8, DRBD_GENLA_F_MANDATORY, disk_flags) + __u64_field(9, DRBD_GENLA_F_MANDATORY, bits_total) + __u64_field(10, DRBD_GENLA_F_MANDATORY, bits_oos) /* and in case resync or online verify is active */ - __u64_field(11, GENLA_F_MANDATORY, bits_rs_total) - __u64_field(12, GENLA_F_MANDATORY, bits_rs_failed) + __u64_field(11, DRBD_GENLA_F_MANDATORY, bits_rs_total) + __u64_field(12, DRBD_GENLA_F_MANDATORY, bits_rs_failed) /* for pre and post notifications of helper execution */ - __str_field(13, GENLA_F_MANDATORY, helper, 32) - __u32_field(14, GENLA_F_MANDATORY, helper_exit_code) + __str_field(13, DRBD_GENLA_F_MANDATORY, helper, 32) + __u32_field(14, DRBD_GENLA_F_MANDATORY, helper_exit_code) ) GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms, - __u64_field(1, GENLA_F_MANDATORY, ov_start_sector) + __u64_field(1, DRBD_GENLA_F_MANDATORY, ov_start_sector) ) GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms, - __flg_field(1, GENLA_F_MANDATORY, clear_bm) + __flg_field(1, DRBD_GENLA_F_MANDATORY, clear_bm) ) GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms, - __u32_field(1, GENLA_F_REQUIRED, timeout_type) + __u32_field(1, DRBD_F_REQUIRED, timeout_type) ) GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms, - __flg_field(1, GENLA_F_MANDATORY, force_disconnect) + __flg_field(1, DRBD_GENLA_F_MANDATORY, force_disconnect) ) /* @@ -232,11 +232,11 @@ GENL_mc_group(events) /* kernel -> userspace announcement of changes */ GENL_notification( DRBD_EVENT, 1, events, - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_STATE_INFO, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_MANDATORY) - GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_MANDATORY) - GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_STATE_INFO, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_SYNCER_CONF, DRBD_GENLA_F_MANDATORY) ) /* query kernel for specific or all info */ @@ -250,116 +250,116 @@ GENL_op( ), /* To select the object .doit. * Or a subset of objects in .dumpit. */ - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY) ) #if 0 /* TO BE DONE */ /* create or destroy resources, aka replication groups */ GENL_op(DRBD_ADM_CREATE_RESOURCE, 3, GENL_doit(drbd_adm_create_resource), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_DELETE_RESOURCE, 4, GENL_doit(drbd_adm_delete_resource), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) #endif /* add DRBD minor devices as volumes to resources */ GENL_op(DRBD_ADM_ADD_MINOR, 5, GENL_doit(drbd_adm_add_minor), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) /* add or delete replication links to resources */ GENL_op(DRBD_ADM_ADD_LINK, 7, GENL_doit(drbd_adm_create_connection), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_DEL_LINK, 8, GENL_doit(drbd_adm_delete_connection), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_RESOURCE_OPTS, 9, GENL_doit(drbd_adm_resource_opts), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, DRBD_GENLA_F_MANDATORY) ) GENL_op( DRBD_ADM_CONNECT, 10, GENL_doit(drbd_adm_connect), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED) ) GENL_op( DRBD_ADM_CHG_NET_OPTS, 29, GENL_doit(drbd_adm_net_opts), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED) ) GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_ATTACH, 12, GENL_doit(drbd_adm_attach), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_F_REQUIRED) ) GENL_op(DRBD_ADM_CHG_DISK_OPTS, 28, GENL_doit(drbd_adm_disk_opts), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_DISK_OPTS, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_DISK_OPTS, DRBD_F_REQUIRED) ) GENL_op( DRBD_ADM_RESIZE, 13, GENL_doit(drbd_adm_resize), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, DRBD_GENLA_F_MANDATORY) ) GENL_op( DRBD_ADM_PRIMARY, 14, GENL_doit(drbd_adm_set_role), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED) ) GENL_op( DRBD_ADM_SECONDARY, 15, GENL_doit(drbd_adm_set_role), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED) ) GENL_op( DRBD_ADM_NEW_C_UUID, 16, GENL_doit(drbd_adm_new_c_uuid), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, DRBD_GENLA_F_MANDATORY) ) GENL_op( DRBD_ADM_START_OV, 17, GENL_doit(drbd_adm_start_ov), - GENL_tla_expected(DRBD_NLA_START_OV_PARMS, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_START_OV_PARMS, DRBD_GENLA_F_MANDATORY) ) GENL_op(DRBD_ADM_DETACH, 18, GENL_doit(drbd_adm_detach), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_INVALIDATE, 19, GENL_doit(drbd_adm_invalidate), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_INVAL_PEER, 20, GENL_doit(drbd_adm_invalidate_peer), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_PAUSE_SYNC, 21, GENL_doit(drbd_adm_pause_sync), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_RESUME_SYNC, 22, GENL_doit(drbd_adm_resume_sync), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_SUSPEND_IO, 23, GENL_doit(drbd_adm_suspend_io), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_RESUME_IO, 24, GENL_doit(drbd_adm_resume_io), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_OUTDATE, 25, GENL_doit(drbd_adm_outdate), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_DOWN, 27, GENL_doit(drbd_adm_down), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 2ae16126c6a..58edd403a3f 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -3,53 +3,6 @@ #include -/* - * Extension of genl attribute validation policies {{{1 - * {{{2 - */ - -/** - * nla_is_required - return true if this attribute is required - * @nla: netlink attribute - */ -static inline int nla_is_required(const struct nlattr *nla) -{ - return nla->nla_type & GENLA_F_REQUIRED; -} - -/** - * nla_is_mandatory - return true if understanding this attribute is mandatory - * @nla: netlink attribute - * Note: REQUIRED attributes are implicitly MANDATORY as well - */ -static inline int nla_is_mandatory(const struct nlattr *nla) -{ - return nla->nla_type & (GENLA_F_MANDATORY | GENLA_F_REQUIRED); -} - -/* Functionality to be integrated into nla_parse(), and validate_nla(), - * respectively. - * - * Enforcing the "mandatory" bit is done here, - * by rejecting unknown mandatory attributes. - * - * Part of enforcing the "required" flag would mean to embed it into - * nla_policy.type, and extending validate_nla(), which currently does - * BUG_ON(pt->type > NLA_TYPE_MAX); we have to work on existing kernels, - * so we cannot do that. Thats why enforcing "required" is done in the - * generated assignment functions below. */ -static int nla_check_unknown(int maxtype, struct nlattr *head, int len) -{ - struct nlattr *nla; - int rem; - nla_for_each_attr(nla, head, len, rem) { - __u16 type = nla_type(nla); - if (type > maxtype && nla_is_mandatory(nla)) - return -EOPNOTSUPP; - } - return 0; -} - /* * Magic: declare tla policy {{{1 * Magic: declare nested policies @@ -80,13 +33,13 @@ static struct nla_policy s_name ## _nl_policy[] __read_mostly = \ #undef __field #define __field(attr_nr, attr_flag, name, nla_type, _type, __get, \ __put, __is_signed) \ - [__nla_type(attr_nr)] = { .type = nla_type }, + [attr_nr] = { .type = nla_type }, #undef __array #define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen, \ __get, __put, __is_signed) \ - [__nla_type(attr_nr)] = { .type = nla_type, \ - .len = maxlen - (nla_type == NLA_NUL_STRING) }, + [attr_nr] = { .type = nla_type, \ + .len = maxlen - (nla_type == NLA_NUL_STRING) }, #include GENL_MAGIC_INCLUDE_FILE @@ -189,6 +142,43 @@ static struct nlattr *nested_attr_tb[128]; #define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) #endif +static inline int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla) +{ + struct nlattr *head = nla_data(nla); + int len = nla_len(nla); + int rem; + + /* + * validate_nla (called from nla_parse_nested) ignores attributes + * beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag. + * In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY + * flag set also, check and remove that flag before calling + * nla_parse_nested. + */ + + nla_for_each_attr(nla, head, len, rem) { + if (nla->nla_type & DRBD_GENLA_F_MANDATORY) { + if (nla_type(nla) > maxtype) + return -EOPNOTSUPP; + nla->nla_type &= ~DRBD_GENLA_F_MANDATORY; + } + } + return 0; +} + +static inline int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, + struct nlattr *nla, + const struct nla_policy *policy) +{ + int err; + + err = drbd_nla_check_mandatory(maxtype, nla); + if (!err) + err = nla_parse_nested(tb, maxtype, nla, policy); + + return err; +} + #undef GENL_struct #define GENL_struct(tag_name, tag_number, s_name, s_fields) \ /* *_from_attrs functions are static, but potentially unused */ \ @@ -204,12 +194,9 @@ static int __ ## s_name ## _from_attrs(struct s_name *s, \ if (!tla) \ return -ENOMSG; \ DPRINT_TLA(#s_name, "<=-", #tag_name); \ - err = nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy); \ + err = drbd_nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy); \ if (err) \ return err; \ - err = nla_check_unknown(maxtype, nla_data(tla), nla_len(tla)); \ - if (err) \ - return err; \ \ s_fields \ return 0; \ @@ -226,17 +213,17 @@ static int s_name ## _from_attrs_for_change(struct s_name *s, \ } __attribute__((unused)) \ #define __assign(attr_nr, attr_flag, name, nla_type, type, assignment...) \ - nla = ntb[__nla_type(attr_nr)]; \ + nla = ntb[attr_nr]; \ if (nla) { \ - if (exclude_invariants && ((attr_flag) & GENLA_F_INVARIANT)) { \ + if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) { \ pr_info("<< must not change invariant attr: %s\n", #name); \ return -EEXIST; \ } \ assignment; \ - } else if (exclude_invariants && ((attr_flag) & GENLA_F_INVARIANT)) { \ + } else if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) { \ /* attribute missing from payload, */ \ /* which was expected */ \ - } else if ((attr_flag) & GENLA_F_REQUIRED) { \ + } else if ((attr_flag) & DRBD_F_REQUIRED) { \ pr_info("<< missing attr: %s\n", #name); \ return -ENOMSG; \ } @@ -415,7 +402,7 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ #undef __field #define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ __is_signed) \ - if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) { \ + if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) { \ DPRINT_FIELD(">>", nla_type, name, s, NULL); \ __put(skb, attr_nr, s->name); \ } @@ -423,7 +410,7 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ #undef __array #define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ __get, __put, __is_signed) \ - if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) { \ + if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) { \ DPRINT_ARRAY(">>",nla_type, name, s, NULL); \ __put(skb, attr_nr, min_t(int, maxlen, \ s->name ## _len + (nla_type == NLA_NUL_STRING)),\ diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index f3c3425ac30..1d0bd79e27b 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -26,50 +26,34 @@ extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void); * Extension of genl attribute validation policies {{{2 */ -/** - * GENLA_F_FLAGS - policy type flags to ease compatible ABI evolvement +/* + * @DRBD_GENLA_F_MANDATORY: By default, netlink ignores attributes it does not + * know about. This flag can be set in nlattr->nla_type to indicate that this + * attribute must not be ignored. * - * @GENLA_F_REQUIRED: attribute has to be present, or message is considered invalid. - * Adding new REQUIRED attributes breaks ABI compatibility, so don't do that. - * - * @GENLA_F_MANDATORY: if present, receiver _must_ understand it. - * Without this, unknown attributes (> maxtype) are _silently_ ignored - * by validate_nla(). - * - * To be used for API extensions, so older kernel can reject requests for not - * yet implemented features, if newer userland tries to use them even though - * the genl_family version clearly indicates they are not available. - * - * NOTE: These flags overload - * NLA_F_NESTED (1 << 15) - * NLA_F_NET_BYTEORDER (1 << 14) - * from linux/netlink.h, which are not useful for validate_nla(): - * NET_BYTEORDER is not used anywhere, and NESTED would be specified by setting - * .type = NLA_NESTED in the appropriate policy. - * - * See also: nla_type() + * We check and remove this flag in drbd_nla_check_mandatory() before + * validating the attribute types and lengths via nla_parse_nested(). */ -enum { - GENLA_F_MANDATORY = 1 << 14, - GENLA_F_REQUIRED = 1 << 15, +#define DRBD_GENLA_F_MANDATORY (1 << 14) - /* Below will not be present in the __u16 .nla_type, but can be - * triggered on in _to_skb resp. _from_attrs */ +/* + * Flags specific to drbd and not visible at the netlink layer, used in + * _from_attrs and _to_skb: + * + * @DRBD_F_REQUIRED: Attribute is required; a request without this attribute is + * invalid. + * + * @DRBD_F_SENSITIVE: Attribute includes sensitive information and must not be + * included in unpriviledged get requests or broadcasts. + * + * @DRBD_F_INVARIANT: Attribute is set when an object is initially created, but + * cannot subsequently be changed. + */ +#define DRBD_F_REQUIRED (1 << 0) +#define DRBD_F_SENSITIVE (1 << 1) +#define DRBD_F_INVARIANT (1 << 2) - /* To exclude "sensitive" information from broadcasts, or on - * unpriviledged get requests. This is useful because genetlink - * multicast groups can be listened in on by anyone. */ - GENLA_F_SENSITIVE = 1 << 16, - - /* INVARIAN options cannot be changed at runtime. - * Useful to share an attribute policy and struct definition, - * between some "create" and "change" commands, - * but disallow certain fields to be changed online. - */ - GENLA_F_INVARIANT = 1 << 17, -}; - -#define __nla_type(x) ((__u16)((__u16)(x) & (__u16)NLA_TYPE_MASK)) +#define __nla_type(x) ((__u16)((x) & NLA_TYPE_MASK & ~DRBD_GENLA_F_MANDATORY)) /* }}}1 * MAGIC @@ -170,12 +154,12 @@ enum { \ #undef __field #define __field(attr_nr, attr_flag, name, nla_type, type, \ __get, __put, __is_signed) \ - T_ ## name = (__u16)(attr_nr | attr_flag), + T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)), #undef __array #define __array(attr_nr, attr_flag, name, nla_type, type, \ maxlen, __get, __put, __is_signed) \ - T_ ## name = (__u16)(attr_nr | attr_flag), + T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)), #include GENL_MAGIC_INCLUDE_FILE From 5084d71d89e1a94193378efb12ac659e4e6ada3f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 24 May 2011 14:08:58 +0200 Subject: [PATCH 393/609] drbd: drbd_nla_check_mandatory(): Need to remove the DRBD_GENLA_F_MANDATORY flag first We need to remove the flag before checking for valid types. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/genl_magic_func.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 58edd403a3f..357f2ad403b 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -158,9 +158,9 @@ static inline int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla) nla_for_each_attr(nla, head, len, rem) { if (nla->nla_type & DRBD_GENLA_F_MANDATORY) { + nla->nla_type &= ~DRBD_GENLA_F_MANDATORY; if (nla_type(nla) > maxtype) return -EOPNOTSUPP; - nla->nla_type &= ~DRBD_GENLA_F_MANDATORY; } } return 0; From 1e2a2551ee8fbbac082eb4c14af1289e7aa880f0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 24 May 2011 14:17:08 +0200 Subject: [PATCH 394/609] drbd: drbd_adm_prepare(): Pass through error codes Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index b81f924c47b..c9ecb7b04c1 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -156,15 +156,19 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, return -EPERM; adm_ctx.reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); - if (!adm_ctx.reply_skb) + if (!adm_ctx.reply_skb) { + err = -ENOMEM; goto fail; + } adm_ctx.reply_dh = genlmsg_put_reply(adm_ctx.reply_skb, info, &drbd_genl_family, 0, cmd); /* put of a few bytes into a fresh skb of >= 4k will always succeed. * but anyways */ - if (!adm_ctx.reply_dh) + if (!adm_ctx.reply_dh) { + err = -ENOMEM; goto fail; + } adm_ctx.reply_dh->minor = d_in->minor; adm_ctx.reply_dh->ret_code = NO_ERROR; @@ -229,7 +233,7 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, fail: nlmsg_free(adm_ctx.reply_skb); adm_ctx.reply_skb = NULL; - return -ENOMEM; + return err; } static int drbd_adm_finish(struct genl_info *info, int retcode) From c75b9b10e7f09bbf9660dd20a675e0a8069bd529 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 24 May 2011 14:18:31 +0200 Subject: [PATCH 395/609] drbd: Don't use empty nested netlink attributes Before mainline commit ea5693cc (v2.6.29-rc1), empty nested netlink attributes were not allowed. Fix that by leaving out nested attributes if they are empty and by allowing the top-level attributes to be missing. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index c9ecb7b04c1..59923db780b 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1155,7 +1155,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) set_disk_conf_defaults(new_disk_conf); err = disk_conf_from_attrs_for_change(new_disk_conf, info); - if (err) { + if (err && err != -ENOMSG) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); } @@ -1902,7 +1902,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) set_net_conf_defaults(new_conf); err = net_conf_from_attrs_for_change(new_conf, info); - if (err) { + if (err && err != -ENOMSG) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); goto fail; @@ -2337,7 +2337,7 @@ int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) set_res_opts_defaults(&res_opts); err = res_opts_from_attrs(&res_opts, info); - if (err) { + if (err && err != -ENOMSG) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); goto fail; From a67b813cfa6f7d84bc3958558be9b87b5b3ea533 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 25 May 2011 11:03:04 +0200 Subject: [PATCH 396/609] drbd: Lower log priority for an event that is definitely not an error Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 96113032b03..4ba09729327 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1084,7 +1084,7 @@ static void drbd_flush(struct drbd_conf *mdev) rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL, NULL); if (rv) { - dev_err(DEV, "local disk flush failed with status %d\n", rv); + dev_info(DEV, "local disk flush failed with status %d\n", rv); /* would rather check on EOPNOTSUPP, but that is not reliable. * don't try again for ANY return value != 0 * if (rv == -EOPNOTSUPP) */ From 46530e859c730984967b6e9e9cac722470096e18 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 31 May 2011 13:08:53 +0200 Subject: [PATCH 397/609] drbd: Use DRBD_MINOR_COUNT_DEF in one more place Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index dab0a75c63b..178c711bc4a 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2669,7 +2669,7 @@ int __init drbd_init(void) #ifdef MODULE return -EINVAL; #else - minor_count = 8; + minor_count = DRBD_MINOR_COUNT_DEF; #endif } From fc251d5c2466413fdd6851e6c3f63e9851bf9d84 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 3 Jun 2011 21:13:17 +0200 Subject: [PATCH 398/609] drbd: cosmetic: fix accidental division instead of modulo when pretty printing For large resync rates, seq_printf_with_thousands_grouping() accidentally only produced Y,000,00Y, instead of the real numbers. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 6b226cca1e8..7e68d99e9c9 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -52,7 +52,7 @@ void seq_printf_with_thousands_grouping(struct seq_file *seq, long v) if (unlikely(v >= 1000000)) { /* cool: > GiByte/s */ seq_printf(seq, "%ld,", v / 1000000); - v /= 1000000; + v %= 1000000; seq_printf(seq, "%03ld,%03ld", v/1000, v % 1000); } else if (likely(v >= 1000)) seq_printf(seq, "%ld,%03ld", v/1000, v % 1000); From 67b58bf723b083d4776cd7c9959246ef46c0d36f Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 6 Jun 2011 15:36:04 +0200 Subject: [PATCH 399/609] drbd: spelling fix: too small It is not "to small", but "too small". Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 8 ++++---- include/linux/drbd.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 59923db780b..31d27dd9292 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1360,7 +1360,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) dev_err(DEV, "max capacity %llu smaller than disk size %llu\n", (unsigned long long) drbd_get_max_capacity(nbc), (unsigned long long) new_disk_conf->disk_size); - retcode = ERR_DISK_TO_SMALL; + retcode = ERR_DISK_TOO_SMALL; goto fail; } @@ -1374,7 +1374,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) { - retcode = ERR_MD_DISK_TO_SMALL; + retcode = ERR_MD_DISK_TOO_SMALL; dev_warn(DEV, "refusing attach: md-device too small, " "at least %llu sectors needed for this meta-disk type\n", (unsigned long long) min_md_device_sectors); @@ -1385,7 +1385,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) * (we may currently be R_PRIMARY with no local disk...) */ if (drbd_get_max_capacity(nbc) < drbd_get_capacity(mdev->this_bdev)) { - retcode = ERR_DISK_TO_SMALL; + retcode = ERR_DISK_TOO_SMALL; goto fail; } @@ -1447,7 +1447,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && drbd_new_dev_size(mdev, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) { dev_warn(DEV, "refusing to truncate a consistent device\n"); - retcode = ERR_DISK_TO_SMALL; + retcode = ERR_DISK_TOO_SMALL; goto force_diskless_dec; } diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 679e8112322..fedda00374a 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -112,8 +112,8 @@ enum drbd_ret_code { ERR_OPEN_MD_DISK = 105, ERR_DISK_NOT_BDEV = 107, ERR_MD_NOT_BDEV = 108, - ERR_DISK_TO_SMALL = 111, - ERR_MD_DISK_TO_SMALL = 112, + ERR_DISK_TOO_SMALL = 111, + ERR_MD_DISK_TOO_SMALL = 112, ERR_BDCLAIM_DISK = 114, ERR_BDCLAIM_MD_DISK = 115, ERR_MD_IDX_INVALID = 116, From 789c1b626cb490acb36cf481b45040b324f60fde Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 6 Jun 2011 16:16:44 +0200 Subject: [PATCH 400/609] drbd: Use the terminology suggested by the command names in the source code and messages Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 16 ++++++++-------- include/linux/drbd.h | 4 ++-- include/linux/drbd_genl.h | 17 ++++------------- 3 files changed, 14 insertions(+), 23 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 31d27dd9292..5b4090f52f5 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -47,8 +47,8 @@ int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info); int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info); -int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info); -int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info); int drbd_adm_down(struct sk_buff *skb, struct genl_info *info); int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info); @@ -2972,7 +2972,7 @@ drbd_check_conn_name(const char *name) return NO_ERROR; } -int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info) +int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; @@ -2989,7 +2989,7 @@ int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info) if (adm_ctx.tconn) { if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) { retcode = ERR_INVALID_REQUEST; - drbd_msg_put_info("connection exists"); + drbd_msg_put_info("resource exists"); } /* else: still NO_ERROR */ goto out; @@ -3086,7 +3086,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) goto out; if (!adm_ctx.tconn) { - retcode = ERR_CONN_NOT_KNOWN; + retcode = ERR_RES_NOT_KNOWN; goto out; } @@ -3140,7 +3140,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) retcode = NO_ERROR; } else { /* "can not happen" */ - retcode = ERR_CONN_IN_USE; + retcode = ERR_RES_IN_USE; drbd_msg_put_info("failed to delete connection"); } goto out; @@ -3149,7 +3149,7 @@ out: return 0; } -int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info) +int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; @@ -3166,7 +3166,7 @@ int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info) retcode = NO_ERROR; } else { - retcode = ERR_CONN_IN_USE; + retcode = ERR_RES_IN_USE; } if (retcode == NO_ERROR) diff --git a/include/linux/drbd.h b/include/linux/drbd.h index fedda00374a..161cd414b03 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -155,8 +155,8 @@ enum drbd_ret_code { ERR_CONG_NOT_PROTO_A = 155, ERR_PIC_AFTER_DEP = 156, ERR_PIC_PEER_DEP = 157, - ERR_CONN_NOT_KNOWN = 158, - ERR_CONN_IN_USE = 159, + ERR_RES_NOT_KNOWN = 158, + ERR_RES_IN_USE = 159, ERR_MINOR_CONFIGURED = 160, ERR_MINOR_EXISTS = 161, ERR_INVALID_REQUEST = 162, diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 4ceecb9307d..47ef324b69d 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -253,25 +253,16 @@ GENL_op( GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY) ) -#if 0 - /* TO BE DONE */ - /* create or destroy resources, aka replication groups */ -GENL_op(DRBD_ADM_CREATE_RESOURCE, 3, GENL_doit(drbd_adm_create_resource), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) -GENL_op(DRBD_ADM_DELETE_RESOURCE, 4, GENL_doit(drbd_adm_delete_resource), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) -#endif - /* add DRBD minor devices as volumes to resources */ -GENL_op(DRBD_ADM_ADD_MINOR, 5, GENL_doit(drbd_adm_add_minor), +GENL_op(DRBD_ADM_NEW_MINOR, 5, GENL_doit(drbd_adm_add_minor), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) - /* add or delete replication links to resources */ -GENL_op(DRBD_ADM_ADD_LINK, 7, GENL_doit(drbd_adm_create_connection), + /* add or delete resources */ +GENL_op(DRBD_ADM_NEW_RESOURCE, 7, GENL_doit(drbd_adm_new_resource), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) -GENL_op(DRBD_ADM_DEL_LINK, 8, GENL_doit(drbd_adm_delete_connection), +GENL_op(DRBD_ADM_DEL_RESOURCE, 8, GENL_doit(drbd_adm_del_resource), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_RESOURCE_OPTS, 9, From 7c3063cc6f0e75cdf312f5f318f9a4c02e460397 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 9 Jun 2011 17:52:12 +0200 Subject: [PATCH 401/609] drbd: Also need to check for DRBD_GENLA_F_MANDATORY flags before nla_find_nested() This is done by introducing drbd_nla_find_nested() which handles the flag before calling nla_find_nested(). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 6 +++ drivers/block/drbd/drbd_nl.c | 96 ++++++++++++++++++++++++++------- include/linux/drbd_genl.h | 2 +- include/linux/genl_magic_func.h | 37 ------------- 4 files changed, 83 insertions(+), 58 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c3019730a24..c58430183d5 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1407,6 +1407,12 @@ extern bool conn_try_outdate_peer(struct drbd_tconn *tconn); extern void conn_try_outdate_peer_async(struct drbd_tconn *tconn); extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); +struct nla_policy; +extern int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla); +extern int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla, + const struct nla_policy *policy); +extern struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype); + /* drbd_worker.c */ extern int drbd_worker(struct drbd_thread *thi); enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 5b4090f52f5..24187f1c93d 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -92,7 +92,7 @@ static struct drbd_config_context { #define VOLUME_UNSPECIFIED (-1U) /* pointer into the request skb, * limited lifetime! */ - char *conn_name; + char *resource_name; /* reply buffer */ struct sk_buff *reply_skb; @@ -191,15 +191,15 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, /* and assign stuff to the global adm_ctx */ nla = nested_attr_tb[__nla_type(T_ctx_volume)]; adm_ctx.volume = nla ? nla_get_u32(nla) : VOLUME_UNSPECIFIED; - nla = nested_attr_tb[__nla_type(T_ctx_conn_name)]; + nla = nested_attr_tb[__nla_type(T_ctx_resource_name)]; if (nla) - adm_ctx.conn_name = nla_data(nla); + adm_ctx.resource_name = nla_data(nla); } else adm_ctx.volume = VOLUME_UNSPECIFIED; adm_ctx.minor = d_in->minor; adm_ctx.mdev = minor_to_mdev(d_in->minor); - adm_ctx.tconn = conn_get_by_name(adm_ctx.conn_name); + adm_ctx.tconn = conn_get_by_name(adm_ctx.resource_name); if (!adm_ctx.mdev && (flags & DRBD_ADM_NEED_MINOR)) { drbd_msg_put_info("unknown minor"); @@ -214,7 +214,8 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, if (adm_ctx.mdev && adm_ctx.tconn && adm_ctx.mdev->tconn != adm_ctx.tconn) { pr_warning("request: minor=%u, conn=%s; but that minor belongs to connection %s\n", - adm_ctx.minor, adm_ctx.conn_name, adm_ctx.mdev->tconn->name); + adm_ctx.minor, adm_ctx.resource_name, + adm_ctx.mdev->tconn->name); drbd_msg_put_info("minor exists in different connection"); return ERR_INVALID_REQUEST; } @@ -239,7 +240,7 @@ fail: static int drbd_adm_finish(struct genl_info *info, int retcode) { struct nlattr *nla; - const char *conn_name = NULL; + const char *resource_name = NULL; if (adm_ctx.tconn) { kref_put(&adm_ctx.tconn->kref, &conn_destroy); @@ -253,9 +254,10 @@ static int drbd_adm_finish(struct genl_info *info, int retcode) nla = info->attrs[DRBD_NLA_CFG_CONTEXT]; if (nla) { - nla = nla_find_nested(nla, __nla_type(T_ctx_conn_name)); - if (nla) - conn_name = nla_data(nla); + int maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1; + nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name)); + if (nla && !IS_ERR(nla)) + resource_name = nla_data(nla); } drbd_adm_send_reply(adm_ctx.reply_skb, info); @@ -2526,7 +2528,7 @@ int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info) return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED)); } -int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *conn_name, unsigned vnr) +int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *resource_name, unsigned vnr) { struct nlattr *nla; nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT); @@ -2534,7 +2536,7 @@ int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *conn_name, unsigne goto nla_put_failure; if (vnr != VOLUME_UNSPECIFIED) NLA_PUT_U32(skb, T_ctx_volume, vnr); - NLA_PUT_STRING(skb, T_ctx_conn_name, conn_name); + NLA_PUT_STRING(skb, T_ctx_resource_name, resource_name); nla_nest_end(skb, nla); return 0; @@ -2778,8 +2780,9 @@ int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb) { const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ; struct nlattr *nla; - const char *conn_name; + const char *resource_name; struct drbd_tconn *tconn; + int maxtype; /* Is this a followup call? */ if (cb->args[0]) { @@ -2799,12 +2802,15 @@ int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb) /* No explicit context given. Dump all. */ if (!nla) goto dump; - nla = nla_find_nested(nla, __nla_type(T_ctx_conn_name)); + maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1; + nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name)); + if (IS_ERR(nla)) + return PTR_ERR(nla); /* context given, but no name present? */ if (!nla) return -EINVAL; - conn_name = nla_data(nla); - tconn = conn_get_by_name(conn_name); + resource_name = nla_data(nla); + tconn = conn_get_by_name(resource_name); if (!tconn) return -ENODEV; @@ -2957,16 +2963,16 @@ out_nolock: } static enum drbd_ret_code -drbd_check_conn_name(const char *name) +drbd_check_resource_name(const char *name) { if (!name || !name[0]) { - drbd_msg_put_info("connection name missing"); + drbd_msg_put_info("resource name missing"); return ERR_MANDATORY_TAG; } /* if we want to use these in sysfs/configfs/debugfs some day, * we must not allow slashes */ if (strchr(name, '/')) { - drbd_msg_put_info("invalid connection name"); + drbd_msg_put_info("invalid resource name"); return ERR_INVALID_REQUEST; } return NO_ERROR; @@ -2982,7 +2988,7 @@ int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - retcode = drbd_check_conn_name(adm_ctx.conn_name); + retcode = drbd_check_resource_name(adm_ctx.resource_name); if (retcode != NO_ERROR) goto out; @@ -2995,7 +3001,7 @@ int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info) goto out; } - if (!conn_create(adm_ctx.conn_name)) + if (!conn_create(adm_ctx.resource_name)) retcode = ERR_NOMEM; out: drbd_adm_finish(info, retcode); @@ -3213,3 +3219,53 @@ failed: "Event seq:%u sib_reason:%u\n", err, seq, sib->sib_reason); } + +int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla) +{ + struct nlattr *head = nla_data(nla); + int len = nla_len(nla); + int rem; + + /* + * validate_nla (called from nla_parse_nested) ignores attributes + * beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag. + * In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY + * flag set also, check and remove that flag before calling + * nla_parse_nested. + */ + + nla_for_each_attr(nla, head, len, rem) { + if (nla->nla_type & DRBD_GENLA_F_MANDATORY) { + nla->nla_type &= ~DRBD_GENLA_F_MANDATORY; + if (nla_type(nla) > maxtype) + return -EOPNOTSUPP; + } + } + return 0; +} + +int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla, + const struct nla_policy *policy) +{ + int err; + + err = drbd_nla_check_mandatory(maxtype, nla); + if (!err) + err = nla_parse_nested(tb, maxtype, nla, policy); + + return err; +} + +struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype) +{ + int err; + /* + * If any nested attribute has the DRBD_GENLA_F_MANDATORY flag set and + * we don't know about that attribute, reject all the nested + * attributes. + */ + err = drbd_nla_check_mandatory(maxtype, nla); + if (err) + return ERR_PTR(err); + return nla_find_nested(nla, attrtype); +} diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 47ef324b69d..0c2102c0538 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -96,7 +96,7 @@ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, * and the volume id within the resource. */ GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, __u32_field(1, DRBD_GENLA_F_MANDATORY, ctx_volume) - __str_field(2, DRBD_GENLA_F_MANDATORY, ctx_conn_name, 128) + __str_field(2, DRBD_GENLA_F_MANDATORY, ctx_resource_name, 128) ) GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 357f2ad403b..0b8a88e2e83 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -142,43 +142,6 @@ static struct nlattr *nested_attr_tb[128]; #define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) #endif -static inline int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla) -{ - struct nlattr *head = nla_data(nla); - int len = nla_len(nla); - int rem; - - /* - * validate_nla (called from nla_parse_nested) ignores attributes - * beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag. - * In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY - * flag set also, check and remove that flag before calling - * nla_parse_nested. - */ - - nla_for_each_attr(nla, head, len, rem) { - if (nla->nla_type & DRBD_GENLA_F_MANDATORY) { - nla->nla_type &= ~DRBD_GENLA_F_MANDATORY; - if (nla_type(nla) > maxtype) - return -EOPNOTSUPP; - } - } - return 0; -} - -static inline int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, - struct nlattr *nla, - const struct nla_policy *policy) -{ - int err; - - err = drbd_nla_check_mandatory(maxtype, nla); - if (!err) - err = nla_parse_nested(tb, maxtype, nla, policy); - - return err; -} - #undef GENL_struct #define GENL_struct(tag_name, tag_number, s_name, s_fields) \ /* *_from_attrs functions are static, but potentially unused */ \ From 01b39b50d34733646fe46a582fa60d3b53f6180d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 10 Jun 2011 12:57:26 +0200 Subject: [PATCH 402/609] drbd: Split off netlink mandatory attribute handling into separate file Duplicate this file in the kernel module and in user space; both sides need it. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/Makefile | 1 + drivers/block/drbd/drbd_int.h | 6 ---- drivers/block/drbd/drbd_nl.c | 51 +-------------------------- drivers/block/drbd/drbd_nla.c | 55 ++++++++++++++++++++++++++++++ drivers/block/drbd/drbd_nla.h | 8 +++++ drivers/block/drbd/drbd_wrappers.h | 1 + 6 files changed, 66 insertions(+), 56 deletions(-) create mode 100644 drivers/block/drbd/drbd_nla.c create mode 100644 drivers/block/drbd/drbd_nla.h diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile index 06fb4453734..8b450338075 100644 --- a/drivers/block/drbd/Makefile +++ b/drivers/block/drbd/Makefile @@ -2,5 +2,6 @@ drbd-y := drbd_bitmap.o drbd_proc.o drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o drbd-y += drbd_main.o drbd_strings.o drbd_nl.o drbd-y += drbd_interval.o drbd_state.o +drbd-y += drbd_nla.o obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c58430183d5..c3019730a24 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1407,12 +1407,6 @@ extern bool conn_try_outdate_peer(struct drbd_tconn *tconn); extern void conn_try_outdate_peer_async(struct drbd_tconn *tconn); extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); -struct nla_policy; -extern int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla); -extern int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla, - const struct nla_policy *policy); -extern struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype); - /* drbd_worker.c */ extern int drbd_worker(struct drbd_thread *thi); enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 24187f1c93d..45bb5cebb59 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -75,6 +75,7 @@ int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info); int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb); #include +#include "drbd_nla.h" #include /* used blkdev_get_by_path, to claim our meta data device(s) */ @@ -3219,53 +3220,3 @@ failed: "Event seq:%u sib_reason:%u\n", err, seq, sib->sib_reason); } - -int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla) -{ - struct nlattr *head = nla_data(nla); - int len = nla_len(nla); - int rem; - - /* - * validate_nla (called from nla_parse_nested) ignores attributes - * beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag. - * In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY - * flag set also, check and remove that flag before calling - * nla_parse_nested. - */ - - nla_for_each_attr(nla, head, len, rem) { - if (nla->nla_type & DRBD_GENLA_F_MANDATORY) { - nla->nla_type &= ~DRBD_GENLA_F_MANDATORY; - if (nla_type(nla) > maxtype) - return -EOPNOTSUPP; - } - } - return 0; -} - -int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla, - const struct nla_policy *policy) -{ - int err; - - err = drbd_nla_check_mandatory(maxtype, nla); - if (!err) - err = nla_parse_nested(tb, maxtype, nla, policy); - - return err; -} - -struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype) -{ - int err; - /* - * If any nested attribute has the DRBD_GENLA_F_MANDATORY flag set and - * we don't know about that attribute, reject all the nested - * attributes. - */ - err = drbd_nla_check_mandatory(maxtype, nla); - if (err) - return ERR_PTR(err); - return nla_find_nested(nla, attrtype); -} diff --git a/drivers/block/drbd/drbd_nla.c b/drivers/block/drbd/drbd_nla.c new file mode 100644 index 00000000000..fa672b6df8d --- /dev/null +++ b/drivers/block/drbd/drbd_nla.c @@ -0,0 +1,55 @@ +#include "drbd_wrappers.h" +#include +#include +#include +#include "drbd_nla.h" + +static int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla) +{ + struct nlattr *head = nla_data(nla); + int len = nla_len(nla); + int rem; + + /* + * validate_nla (called from nla_parse_nested) ignores attributes + * beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag. + * In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY + * flag set also, check and remove that flag before calling + * nla_parse_nested. + */ + + nla_for_each_attr(nla, head, len, rem) { + if (nla->nla_type & DRBD_GENLA_F_MANDATORY) { + nla->nla_type &= ~DRBD_GENLA_F_MANDATORY; + if (nla_type(nla) > maxtype) + return -EOPNOTSUPP; + } + } + return 0; +} + +int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla, + const struct nla_policy *policy) +{ + int err; + + err = drbd_nla_check_mandatory(maxtype, nla); + if (!err) + err = nla_parse_nested(tb, maxtype, nla, policy); + + return err; +} + +struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype) +{ + int err; + /* + * If any nested attribute has the DRBD_GENLA_F_MANDATORY flag set and + * we don't know about that attribute, reject all the nested + * attributes. + */ + err = drbd_nla_check_mandatory(maxtype, nla); + if (err) + return ERR_PTR(err); + return nla_find_nested(nla, attrtype); +} diff --git a/drivers/block/drbd/drbd_nla.h b/drivers/block/drbd/drbd_nla.h new file mode 100644 index 00000000000..679c2d5b453 --- /dev/null +++ b/drivers/block/drbd/drbd_nla.h @@ -0,0 +1,8 @@ +#ifndef __DRBD_NLA_H +#define __DRBD_NLA_H + +extern int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla, + const struct nla_policy *policy); +extern struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype); + +#endif /* __DRBD_NLA_H */ diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h index 46a6d99f7b6..328f18e4b4e 100644 --- a/drivers/block/drbd/drbd_wrappers.h +++ b/drivers/block/drbd/drbd_wrappers.h @@ -3,6 +3,7 @@ #include #include +#include "drbd_int.h" /* see get_sb_bdev and bd_claim */ extern char *drbd_sec_holder; From 44e52cfaa22e44a0197b44cd72c3440bc2a6e1ed Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 14 Jun 2011 16:07:32 +0200 Subject: [PATCH 403/609] drbd: Rename DRBD_ADM_NEED_{CONN -> RESOURCE} Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 45bb5cebb59..352be132b4b 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -141,7 +141,7 @@ int drbd_msg_put_info(const char *info) * If it returns successfully, adm_ctx members are valid. */ #define DRBD_ADM_NEED_MINOR 1 -#define DRBD_ADM_NEED_CONN 2 +#define DRBD_ADM_NEED_RESOURCE 2 static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, unsigned flags) { @@ -206,18 +206,18 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, drbd_msg_put_info("unknown minor"); return ERR_MINOR_INVALID; } - if (!adm_ctx.tconn && (flags & DRBD_ADM_NEED_CONN)) { - drbd_msg_put_info("unknown connection"); + if (!adm_ctx.tconn && (flags & DRBD_ADM_NEED_RESOURCE)) { + drbd_msg_put_info("unknown resource"); return ERR_INVALID_REQUEST; } /* some more paranoia, if the request was over-determined */ if (adm_ctx.mdev && adm_ctx.tconn && adm_ctx.mdev->tconn != adm_ctx.tconn) { - pr_warning("request: minor=%u, conn=%s; but that minor belongs to connection %s\n", + pr_warning("request: minor=%u, resource=%s; but that minor belongs to connection %s\n", adm_ctx.minor, adm_ctx.resource_name, adm_ctx.mdev->tconn->name); - drbd_msg_put_info("minor exists in different connection"); + drbd_msg_put_info("minor exists in different resource"); return ERR_INVALID_REQUEST; } if (adm_ctx.mdev && @@ -1874,7 +1874,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) int rsr; /* re-sync running */ struct crypto crypto = { }; - retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); if (!adm_ctx.reply_skb) return retcode; if (retcode != NO_ERROR) @@ -1993,7 +1993,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) int i; int err; - retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); if (!adm_ctx.reply_skb) return retcode; if (retcode != NO_ERROR) @@ -2170,7 +2170,7 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) enum drbd_ret_code retcode; int err; - retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); if (!adm_ctx.reply_skb) return retcode; if (retcode != NO_ERROR) @@ -2322,7 +2322,7 @@ int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) struct res_opts res_opts; int err; - retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); if (!adm_ctx.reply_skb) return retcode; if (retcode != NO_ERROR) @@ -3014,7 +3014,7 @@ int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info) struct drbd_genlmsghdr *dh = info->userhdr; enum drbd_ret_code retcode; - retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); if (!adm_ctx.reply_skb) return retcode; if (retcode != NO_ERROR) @@ -3160,7 +3160,7 @@ int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; - retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); if (!adm_ctx.reply_skb) return retcode; if (retcode != NO_ERROR) From 089c075d88ac9407b8d7c5c8fc4b21c0d940bd82 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 14 Jun 2011 18:28:09 +0200 Subject: [PATCH 404/609] drbd: Convert the generic netlink interface to accept connection endpoints Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 7 ++ drivers/block/drbd/drbd_main.c | 21 ++++ drivers/block/drbd/drbd_nl.c | 158 +++++++++++++++++------------ drivers/block/drbd/drbd_receiver.c | 21 ++-- drivers/block/drbd/drbd_state.c | 2 + include/linux/drbd_genl.h | 62 +++++------ 6 files changed, 164 insertions(+), 107 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c3019730a24..6d6d1056d82 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -836,6 +836,11 @@ struct drbd_tconn { /* is a resource from the config file */ wait_queue_head_t ping_wait; /* Woken upon reception of a ping, and a state change */ struct res_opts res_opts; + struct sockaddr_storage my_addr; + int my_addr_len; + struct sockaddr_storage peer_addr; + int peer_addr_len; + struct drbd_socket data; /* data/barrier/cstate/parameter packets */ struct drbd_socket meta; /* ping/ack (metadata) packets */ int agreed_pro_version; /* actually used protocol version */ @@ -1377,6 +1382,8 @@ extern void drbd_minor_destroy(struct kref *kref); struct drbd_tconn *conn_create(const char *name); extern void conn_destroy(struct kref *kref); struct drbd_tconn *conn_get_by_name(const char *name); +extern struct drbd_tconn *conn_get_by_addrs(void *my_addr, int my_addr_len, + void *peer_addr, int peer_addr_len); extern void conn_free_crypto(struct drbd_tconn *tconn); extern int proc_details; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 178c711bc4a..79f275dc43a 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2420,6 +2420,27 @@ found: return tconn; } +struct drbd_tconn *conn_get_by_addrs(void *my_addr, int my_addr_len, + void *peer_addr, int peer_addr_len) +{ + struct drbd_tconn *tconn; + + rcu_read_lock(); + list_for_each_entry_rcu(tconn, &drbd_tconns, all_tconn) { + if (tconn->my_addr_len == my_addr_len && + tconn->peer_addr_len == peer_addr_len && + !memcmp(&tconn->my_addr, my_addr, my_addr_len) && + !memcmp(&tconn->peer_addr, peer_addr, peer_addr_len)) { + kref_get(&tconn->kref); + goto found; + } + } + tconn = NULL; +found: + rcu_read_unlock(); + return tconn; +} + static int drbd_alloc_socket(struct drbd_socket *socket) { socket->rbuf = (void *) __get_free_page(GFP_KERNEL); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 352be132b4b..e7933e04e7b 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -94,6 +94,8 @@ static struct drbd_config_context { /* pointer into the request skb, * limited lifetime! */ char *resource_name; + struct nlattr *my_addr; + struct nlattr *peer_addr; /* reply buffer */ struct sk_buff *reply_skb; @@ -142,6 +144,7 @@ int drbd_msg_put_info(const char *info) */ #define DRBD_ADM_NEED_MINOR 1 #define DRBD_ADM_NEED_RESOURCE 2 +#define DRBD_ADM_NEED_CONNECTION 4 static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, unsigned flags) { @@ -174,6 +177,7 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, adm_ctx.reply_dh->minor = d_in->minor; adm_ctx.reply_dh->ret_code = NO_ERROR; + adm_ctx.volume = VOLUME_UNSPECIFIED; if (info->attrs[DRBD_NLA_CFG_CONTEXT]) { struct nlattr *nla; /* parse and validate only */ @@ -191,12 +195,21 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, /* and assign stuff to the global adm_ctx */ nla = nested_attr_tb[__nla_type(T_ctx_volume)]; - adm_ctx.volume = nla ? nla_get_u32(nla) : VOLUME_UNSPECIFIED; + if (nla) + adm_ctx.volume = nla_get_u32(nla); nla = nested_attr_tb[__nla_type(T_ctx_resource_name)]; if (nla) adm_ctx.resource_name = nla_data(nla); - } else - adm_ctx.volume = VOLUME_UNSPECIFIED; + adm_ctx.my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)]; + adm_ctx.peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)]; + if ((adm_ctx.my_addr && + nla_len(adm_ctx.my_addr) > sizeof(adm_ctx.tconn->my_addr)) || + (adm_ctx.peer_addr && + nla_len(adm_ctx.peer_addr) > sizeof(adm_ctx.tconn->peer_addr))) { + err = -EINVAL; + goto fail; + } + } adm_ctx.minor = d_in->minor; adm_ctx.mdev = minor_to_mdev(d_in->minor); @@ -211,6 +224,26 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, return ERR_INVALID_REQUEST; } + if (flags & DRBD_ADM_NEED_CONNECTION) { + if (adm_ctx.tconn && !(flags & DRBD_ADM_NEED_RESOURCE)) { + drbd_msg_put_info("no resource name expected"); + return ERR_INVALID_REQUEST; + } + if (adm_ctx.mdev) { + drbd_msg_put_info("no minor number expected"); + return ERR_INVALID_REQUEST; + } + if (adm_ctx.my_addr && adm_ctx.peer_addr) + adm_ctx.tconn = conn_get_by_addrs(nla_data(adm_ctx.my_addr), + nla_len(adm_ctx.my_addr), + nla_data(adm_ctx.peer_addr), + nla_len(adm_ctx.peer_addr)); + if (!adm_ctx.tconn) { + drbd_msg_put_info("unknown connection"); + return ERR_INVALID_REQUEST; + } + } + /* some more paranoia, if the request was over-determined */ if (adm_ctx.mdev && adm_ctx.tconn && adm_ctx.mdev->tconn != adm_ctx.tconn) { @@ -268,30 +301,28 @@ static int drbd_adm_finish(struct genl_info *info, int retcode) static void setup_khelper_env(struct drbd_tconn *tconn, char **envp) { char *afs; - struct net_conf *nc; - rcu_read_lock(); - nc = rcu_dereference(tconn->net_conf); - if (nc) { - switch (((struct sockaddr *)nc->peer_addr)->sa_family) { - case AF_INET6: - afs = "ipv6"; - snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6", - &((struct sockaddr_in6 *)nc->peer_addr)->sin6_addr); - break; - case AF_INET: - afs = "ipv4"; - snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4", - &((struct sockaddr_in *)nc->peer_addr)->sin_addr); - break; - default: - afs = "ssocks"; - snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4", - &((struct sockaddr_in *)nc->peer_addr)->sin_addr); - } - snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs); + /* FIXME: A future version will not allow this case. */ + if (tconn->my_addr_len == 0 || tconn->peer_addr_len == 0) + return; + + switch (((struct sockaddr *)&tconn->peer_addr)->sa_family) { + case AF_INET6: + afs = "ipv6"; + snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6", + &((struct sockaddr_in6 *)&tconn->peer_addr)->sin6_addr); + break; + case AF_INET: + afs = "ipv4"; + snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4", + &((struct sockaddr_in *)&tconn->peer_addr)->sin_addr); + break; + default: + afs = "ssocks"; + snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4", + &((struct sockaddr_in *)&tconn->peer_addr)->sin_addr); } - rcu_read_unlock(); + snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs); } int drbd_khelper(struct drbd_conf *mdev, char *cmd) @@ -1874,7 +1905,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) int rsr; /* re-sync running */ struct crypto crypto = { }; - retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION); if (!adm_ctx.reply_skb) return retcode; if (retcode != NO_ERROR) @@ -1986,18 +2017,39 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) struct drbd_conf *mdev; struct net_conf *old_conf, *new_conf = NULL; struct crypto crypto = { }; - struct drbd_tconn *oconn; struct drbd_tconn *tconn; - struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr; enum drbd_ret_code retcode; int i; int err; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); + if (!adm_ctx.reply_skb) return retcode; if (retcode != NO_ERROR) goto out; + if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) { + drbd_msg_put_info("connection endpoint(s) missing"); + retcode = ERR_INVALID_REQUEST; + goto out; + } + + /* No need for _rcu here. All reconfiguration is + * strictly serialized on genl_lock(). We are protected against + * concurrent reconfiguration/addition/deletion */ + list_for_each_entry(tconn, &drbd_tconns, all_tconn) { + if (nla_len(adm_ctx.my_addr) == tconn->my_addr_len && + !memcmp(nla_data(adm_ctx.my_addr), &tconn->my_addr, tconn->my_addr_len)) { + retcode = ERR_LOCAL_ADDR; + goto out; + } + + if (nla_len(adm_ctx.peer_addr) == tconn->peer_addr_len && + !memcmp(nla_data(adm_ctx.peer_addr), &tconn->peer_addr, tconn->peer_addr_len)) { + retcode = ERR_PEER_ADDR; + goto out; + } + } tconn = adm_ctx.tconn; conn_reconfig_start(tconn); @@ -2027,37 +2079,6 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto fail; - retcode = NO_ERROR; - - new_my_addr = (struct sockaddr *)&new_conf->my_addr; - new_peer_addr = (struct sockaddr *)&new_conf->peer_addr; - - /* No need for _rcu here. All reconfiguration is - * strictly serialized on genl_lock(). We are protected against - * concurrent reconfiguration/addition/deletion */ - list_for_each_entry(oconn, &drbd_tconns, all_tconn) { - struct net_conf *nc; - if (oconn == tconn) - continue; - - rcu_read_lock(); - nc = rcu_dereference(oconn->net_conf); - if (nc) { - taken_addr = (struct sockaddr *)&nc->my_addr; - if (new_conf->my_addr_len == nc->my_addr_len && - !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len)) - retcode = ERR_LOCAL_ADDR; - - taken_addr = (struct sockaddr *)&nc->peer_addr; - if (new_conf->peer_addr_len == nc->peer_addr_len && - !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len)) - retcode = ERR_PEER_ADDR; - } - rcu_read_unlock(); - if (retcode != NO_ERROR) - goto fail; - } - retcode = alloc_crypto(&crypto, new_conf); if (retcode != NO_ERROR) goto fail; @@ -2083,6 +2104,11 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) tconn->csums_tfm = crypto.csums_tfm; tconn->verify_tfm = crypto.verify_tfm; + tconn->my_addr_len = nla_len(adm_ctx.my_addr); + memcpy(&tconn->my_addr, nla_data(adm_ctx.my_addr), tconn->my_addr_len); + tconn->peer_addr_len = nla_len(adm_ctx.peer_addr); + memcpy(&tconn->peer_addr, nla_data(adm_ctx.peer_addr), tconn->peer_addr_len); + mutex_unlock(&tconn->conf_update); rcu_read_lock(); @@ -2170,7 +2196,7 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) enum drbd_ret_code retcode; int err; - retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION); if (!adm_ctx.reply_skb) return retcode; if (retcode != NO_ERROR) @@ -2529,7 +2555,7 @@ int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info) return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED)); } -int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *resource_name, unsigned vnr) +int nla_put_drbd_cfg_context(struct sk_buff *skb, struct drbd_tconn *tconn, unsigned vnr) { struct nlattr *nla; nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT); @@ -2537,7 +2563,11 @@ int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *resource_name, uns goto nla_put_failure; if (vnr != VOLUME_UNSPECIFIED) NLA_PUT_U32(skb, T_ctx_volume, vnr); - NLA_PUT_STRING(skb, T_ctx_resource_name, resource_name); + NLA_PUT_STRING(skb, T_ctx_resource_name, tconn->name); + if (tconn->my_addr_len) + NLA_PUT(skb, T_ctx_my_addr, tconn->my_addr_len, &tconn->my_addr); + if (tconn->peer_addr_len) + NLA_PUT(skb, T_ctx_peer_addr, tconn->peer_addr_len, &tconn->peer_addr); nla_nest_end(skb, nla); return 0; @@ -2574,7 +2604,7 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, /* We need to add connection name and volume number information still. * Minor number is in drbd_genlmsghdr. */ - if (nla_put_drbd_cfg_context(skb, mdev->tconn->name, mdev->vnr)) + if (nla_put_drbd_cfg_context(skb, mdev->tconn, mdev->vnr)) goto nla_put_failure; if (res_opts_to_skb(skb, &mdev->tconn->res_opts, exclude_sensitive)) @@ -2736,7 +2766,7 @@ next_tconn: /* this is a tconn without a single volume */ dh->minor = -1U; dh->ret_code = NO_ERROR; - if (nla_put_drbd_cfg_context(skb, tconn->name, VOLUME_UNSPECIFIED)) + if (nla_put_drbd_cfg_context(skb, tconn, VOLUME_UNSPECIFIED)) genlmsg_cancel(skb, dh); else genlmsg_end(skb, dh); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 4ba09729327..ab1d36cb621 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -626,23 +626,21 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn) rcu_read_unlock(); return NULL; } - sndbuf_size = nc->sndbuf_size; rcvbuf_size = nc->rcvbuf_size; connect_int = nc->connect_int; + rcu_read_unlock(); - my_addr_len = min_t(int, nc->my_addr_len, sizeof(src_in6)); - memcpy(&src_in6, nc->my_addr, my_addr_len); + my_addr_len = min_t(int, tconn->my_addr_len, sizeof(src_in6)); + memcpy(&src_in6, &tconn->my_addr, my_addr_len); - if (((struct sockaddr *)nc->my_addr)->sa_family == AF_INET6) + if (((struct sockaddr *)&tconn->my_addr)->sa_family == AF_INET6) src_in6.sin6_port = 0; else ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */ - peer_addr_len = min_t(int, nc->peer_addr_len, sizeof(src_in6)); - memcpy(&peer_in6, nc->peer_addr, peer_addr_len); - - rcu_read_unlock(); + peer_addr_len = min_t(int, tconn->peer_addr_len, sizeof(src_in6)); + memcpy(&peer_in6, &tconn->peer_addr, peer_addr_len); what = "sock_create_kern"; err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family, @@ -714,15 +712,14 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn) rcu_read_unlock(); return NULL; } - sndbuf_size = nc->sndbuf_size; rcvbuf_size = nc->rcvbuf_size; connect_int = nc->connect_int; - - my_addr_len = min_t(int, nc->my_addr_len, sizeof(struct sockaddr_in6)); - memcpy(&my_addr, nc->my_addr, my_addr_len); rcu_read_unlock(); + my_addr_len = min_t(int, tconn->my_addr_len, sizeof(struct sockaddr_in6)); + memcpy(&my_addr, &tconn->my_addr, my_addr_len); + what = "sock_create_kern"; err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family, SOCK_STREAM, IPPROTO_TCP, &s_listen); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index cd55f46d5c5..d978e4d98a1 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1418,6 +1418,8 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) mutex_lock(&tconn->conf_update); old_conf = tconn->net_conf; + tconn->my_addr_len = 0; + tconn->peer_addr_len = 0; rcu_assign_pointer(tconn->net_conf, NULL); conn_free_crypto(tconn); mutex_unlock(&tconn->conf_update); diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 0c2102c0538..b93db6c8388 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -97,6 +97,8 @@ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, __u32_field(1, DRBD_GENLA_F_MANDATORY, ctx_volume) __str_field(2, DRBD_GENLA_F_MANDATORY, ctx_resource_name, 128) + __bin_field(3, DRBD_GENLA_F_MANDATORY, ctx_my_addr, 128) + __bin_field(4, DRBD_GENLA_F_MANDATORY, ctx_peer_addr, 128) ) GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, @@ -134,38 +136,36 @@ GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, ) GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, - __bin_field(1, DRBD_F_REQUIRED | DRBD_F_INVARIANT, my_addr, 128) - __bin_field(2, DRBD_F_REQUIRED | DRBD_F_INVARIANT, peer_addr, 128) - __str_field_def(3, DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE, + __str_field_def(1, DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE, shared_secret, SHARED_SECRET_MAX) - __str_field_def(4, DRBD_GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) - __str_field_def(5, DRBD_GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) - __str_field_def(6, DRBD_GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) - __str_field_def(7, DRBD_GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) - __u32_field_def(8, DRBD_GENLA_F_MANDATORY, wire_protocol, DRBD_PROTOCOL_DEF) - __u32_field_def(9, DRBD_GENLA_F_MANDATORY, connect_int, DRBD_CONNECT_INT_DEF) - __u32_field_def(10, DRBD_GENLA_F_MANDATORY, timeout, DRBD_TIMEOUT_DEF) - __u32_field_def(11, DRBD_GENLA_F_MANDATORY, ping_int, DRBD_PING_INT_DEF) - __u32_field_def(12, DRBD_GENLA_F_MANDATORY, ping_timeo, DRBD_PING_TIMEO_DEF) - __u32_field_def(13, DRBD_GENLA_F_MANDATORY, sndbuf_size, DRBD_SNDBUF_SIZE_DEF) - __u32_field_def(14, DRBD_GENLA_F_MANDATORY, rcvbuf_size, DRBD_RCVBUF_SIZE_DEF) - __u32_field_def(15, DRBD_GENLA_F_MANDATORY, ko_count, DRBD_KO_COUNT_DEF) - __u32_field_def(16, DRBD_GENLA_F_MANDATORY, max_buffers, DRBD_MAX_BUFFERS_DEF) - __u32_field_def(17, DRBD_GENLA_F_MANDATORY, max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF) - __u32_field_def(18, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) - __u32_field_def(19, DRBD_GENLA_F_MANDATORY, after_sb_0p, DRBD_AFTER_SB_0P_DEF) - __u32_field_def(20, DRBD_GENLA_F_MANDATORY, after_sb_1p, DRBD_AFTER_SB_1P_DEF) - __u32_field_def(21, DRBD_GENLA_F_MANDATORY, after_sb_2p, DRBD_AFTER_SB_2P_DEF) - __u32_field_def(22, DRBD_GENLA_F_MANDATORY, rr_conflict, DRBD_RR_CONFLICT_DEF) - __u32_field_def(23, DRBD_GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF) - __u32_field_def(24, DRBD_GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF) - __u32_field_def(25, DRBD_GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) - __flg_field_def(26, DRBD_GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF) - __flg_field(27, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, discard_my_data) - __flg_field_def(28, DRBD_GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF) - __flg_field_def(29, DRBD_GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) - __flg_field(30, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, dry_run) - __flg_field_def(31, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) + __str_field_def(2, DRBD_GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) + __str_field_def(3, DRBD_GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) + __str_field_def(4, DRBD_GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) + __str_field_def(5, DRBD_GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) + __u32_field_def(6, DRBD_GENLA_F_MANDATORY, wire_protocol, DRBD_PROTOCOL_DEF) + __u32_field_def(7, DRBD_GENLA_F_MANDATORY, connect_int, DRBD_CONNECT_INT_DEF) + __u32_field_def(8, DRBD_GENLA_F_MANDATORY, timeout, DRBD_TIMEOUT_DEF) + __u32_field_def(9, DRBD_GENLA_F_MANDATORY, ping_int, DRBD_PING_INT_DEF) + __u32_field_def(10, DRBD_GENLA_F_MANDATORY, ping_timeo, DRBD_PING_TIMEO_DEF) + __u32_field_def(11, DRBD_GENLA_F_MANDATORY, sndbuf_size, DRBD_SNDBUF_SIZE_DEF) + __u32_field_def(12, DRBD_GENLA_F_MANDATORY, rcvbuf_size, DRBD_RCVBUF_SIZE_DEF) + __u32_field_def(13, DRBD_GENLA_F_MANDATORY, ko_count, DRBD_KO_COUNT_DEF) + __u32_field_def(14, DRBD_GENLA_F_MANDATORY, max_buffers, DRBD_MAX_BUFFERS_DEF) + __u32_field_def(15, DRBD_GENLA_F_MANDATORY, max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF) + __u32_field_def(16, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) + __u32_field_def(17, DRBD_GENLA_F_MANDATORY, after_sb_0p, DRBD_AFTER_SB_0P_DEF) + __u32_field_def(18, DRBD_GENLA_F_MANDATORY, after_sb_1p, DRBD_AFTER_SB_1P_DEF) + __u32_field_def(19, DRBD_GENLA_F_MANDATORY, after_sb_2p, DRBD_AFTER_SB_2P_DEF) + __u32_field_def(20, DRBD_GENLA_F_MANDATORY, rr_conflict, DRBD_RR_CONFLICT_DEF) + __u32_field_def(21, DRBD_GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF) + __u32_field_def(22, DRBD_GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF) + __u32_field_def(23, DRBD_GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) + __flg_field_def(24, DRBD_GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF) + __flg_field(25, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, discard_my_data) + __flg_field_def(26, DRBD_GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF) + __flg_field_def(27, DRBD_GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) + __flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, dry_run) + __flg_field_def(29, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, From afbbfa88bc506bc13b957c3811bce299de1d31d5 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 16 Jun 2011 17:58:02 +0200 Subject: [PATCH 405/609] drbd: Allow to pass resource options to the new-resource command This is equivalent to how the attach and connect commands work. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 +-- drivers/block/drbd/drbd_main.c | 46 ++++++++++++++++++++++++++++-- drivers/block/drbd/drbd_nl.c | 51 +++++++++++----------------------- 3 files changed, 61 insertions(+), 40 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 6d6d1056d82..ece2e4a991f 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1379,7 +1379,8 @@ extern int conn_lowest_minor(struct drbd_tconn *tconn); enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr); extern void drbd_minor_destroy(struct kref *kref); -struct drbd_tconn *conn_create(const char *name); +extern int set_resource_options(struct drbd_tconn *tconn, struct res_opts *res_opts); +extern struct drbd_tconn *conn_create(const char *name, struct res_opts *res_opts); extern void conn_destroy(struct kref *kref); struct drbd_tconn *conn_get_by_name(const char *name); extern struct drbd_tconn *conn_get_by_addrs(void *my_addr, int my_addr_len, @@ -1397,7 +1398,6 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t); /* drbd_nl.c */ -extern void drbd_set_res_opts_defaults(struct res_opts *r); extern int drbd_msg_put_info(const char *info); extern void drbd_suspend_io(struct drbd_conf *mdev); extern void drbd_resume_io(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 79f275dc43a..933d4767c11 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2479,8 +2479,47 @@ void conn_free_crypto(struct drbd_tconn *tconn) tconn->int_dig_vv = NULL; } +int set_resource_options(struct drbd_tconn *tconn, struct res_opts *res_opts) +{ + cpumask_var_t new_cpu_mask; + int err; + + if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) + return -ENOMEM; + /* + retcode = ERR_NOMEM; + drbd_msg_put_info("unable to allocate cpumask"); + */ + + /* silently ignore cpu mask on UP kernel */ + if (nr_cpu_ids > 1 && res_opts->cpu_mask[0] != 0) { + /* FIXME: Get rid of constant 32 here */ + err = __bitmap_parse(res_opts->cpu_mask, 32, 0, + cpumask_bits(new_cpu_mask), nr_cpu_ids); + if (err) { + conn_warn(tconn, "__bitmap_parse() failed with %d\n", err); + /* retcode = ERR_CPU_MASK_PARSE; */ + goto fail; + } + } + tconn->res_opts = *res_opts; + if (!cpumask_equal(tconn->cpu_mask, new_cpu_mask)) { + cpumask_copy(tconn->cpu_mask, new_cpu_mask); + drbd_calc_cpu_mask(tconn); + tconn->receiver.reset_cpu_mask = 1; + tconn->asender.reset_cpu_mask = 1; + tconn->worker.reset_cpu_mask = 1; + } + err = 0; + +fail: + free_cpumask_var(new_cpu_mask); + return err; + +} + /* caller must be under genl_lock() */ -struct drbd_tconn *conn_create(const char *name) +struct drbd_tconn *conn_create(const char *name, struct res_opts *res_opts) { struct drbd_tconn *tconn; @@ -2500,6 +2539,9 @@ struct drbd_tconn *conn_create(const char *name) if (!zalloc_cpumask_var(&tconn->cpu_mask, GFP_KERNEL)) goto fail; + if (set_resource_options(tconn, res_opts)) + goto fail; + if (!tl_init(tconn)) goto fail; @@ -2520,8 +2562,6 @@ struct drbd_tconn *conn_create(const char *name) drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker"); drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender"); - drbd_set_res_opts_defaults(&tconn->res_opts); - kref_init(&tconn->kref); list_add_tail_rcu(&tconn->all_tconn, &drbd_tconns); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index e7933e04e7b..930af5dbfa7 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2335,15 +2335,9 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) return 0; } -void drbd_set_res_opts_defaults(struct res_opts *r) -{ - return set_res_opts_defaults(r); -} - int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; - cpumask_var_t new_cpu_mask; struct drbd_tconn *tconn; struct res_opts res_opts; int err; @@ -2355,12 +2349,6 @@ int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) goto fail; tconn = adm_ctx.tconn; - if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) { - retcode = ERR_NOMEM; - drbd_msg_put_info("unable to allocate cpumask"); - goto fail; - } - res_opts = tconn->res_opts; if (should_set_defaults(info)) set_res_opts_defaults(&res_opts); @@ -2372,31 +2360,14 @@ int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) goto fail; } - /* silently ignore cpu mask on UP kernel */ - if (nr_cpu_ids > 1 && res_opts.cpu_mask[0] != 0) { - err = __bitmap_parse(res_opts.cpu_mask, 32, 0, - cpumask_bits(new_cpu_mask), nr_cpu_ids); - if (err) { - conn_warn(tconn, "__bitmap_parse() failed with %d\n", err); - retcode = ERR_CPU_MASK_PARSE; - goto fail; - } - } - - - tconn->res_opts = res_opts; - - if (!cpumask_equal(tconn->cpu_mask, new_cpu_mask)) { - cpumask_copy(tconn->cpu_mask, new_cpu_mask); - drbd_calc_cpu_mask(tconn); - tconn->receiver.reset_cpu_mask = 1; - tconn->asender.reset_cpu_mask = 1; - tconn->worker.reset_cpu_mask = 1; + err = set_resource_options(tconn, &res_opts); + if (err) { + retcode = ERR_INVALID_REQUEST; + if (err == -ENOMEM) + retcode = ERR_NOMEM; } fail: - free_cpumask_var(new_cpu_mask); - drbd_adm_finish(info, retcode); return 0; } @@ -3012,6 +2983,8 @@ drbd_check_resource_name(const char *name) int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; + struct res_opts res_opts; + int err; retcode = drbd_adm_prepare(skb, info, 0); if (!adm_ctx.reply_skb) @@ -3019,6 +2992,14 @@ int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; + set_res_opts_defaults(&res_opts); + err = res_opts_from_attrs(&res_opts, info); + if (err && err != -ENOMSG) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto out; + } + retcode = drbd_check_resource_name(adm_ctx.resource_name); if (retcode != NO_ERROR) goto out; @@ -3032,7 +3013,7 @@ int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info) goto out; } - if (!conn_create(adm_ctx.resource_name)) + if (!conn_create(adm_ctx.resource_name, &res_opts)) retcode = ERR_NOMEM; out: drbd_adm_finish(info, retcode); From d0fa7fd680a56a3fd52817fe93c1c8abe7164ef1 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 21 Jun 2011 12:58:22 +0200 Subject: [PATCH 406/609] drbd: Remove dead code Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 930af5dbfa7..f92346296ae 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -273,9 +273,6 @@ fail: static int drbd_adm_finish(struct genl_info *info, int retcode) { - struct nlattr *nla; - const char *resource_name = NULL; - if (adm_ctx.tconn) { kref_put(&adm_ctx.tconn->kref, &conn_destroy); adm_ctx.tconn = NULL; @@ -285,15 +282,6 @@ static int drbd_adm_finish(struct genl_info *info, int retcode) return -ENOMEM; adm_ctx.reply_dh->ret_code = retcode; - - nla = info->attrs[DRBD_NLA_CFG_CONTEXT]; - if (nla) { - int maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1; - nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name)); - if (nla && !IS_ERR(nla)) - resource_name = nla_data(nla); - } - drbd_adm_send_reply(adm_ctx.reply_skb, info); return 0; } From 6dff2902208364d058746ee794da4d960f6eec6f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 28 Jun 2011 14:18:12 +0200 Subject: [PATCH 407/609] drbd: Rename --dry-run to --tentative drbdadm already has a --dry-run option, so this option cannot directly be passed through to drbdsetup. Rename the drbdsetup option to resolve this conflict. For backward compatibility, make --dry-run an alias of --tentative. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 4 ++-- drivers/block/drbd/drbd_receiver.c | 6 +++--- include/linux/drbd_genl.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 933d4767c11..72b1dfa4b65 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -926,7 +926,7 @@ int __drbd_send_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd) rcu_read_lock(); nc = rcu_dereference(tconn->net_conf); - if (nc->dry_run && tconn->agreed_pro_version < 92) { + if (nc->tentative && tconn->agreed_pro_version < 92) { rcu_read_unlock(); mutex_unlock(&sock->mutex); conn_err(tconn, "--dry-run is not supported by peer"); @@ -945,7 +945,7 @@ int __drbd_send_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd) cf = 0; if (nc->discard_my_data) cf |= CF_DISCARD_MY_DATA; - if (nc->dry_run) + if (nc->tentative) cf |= CF_DRY_RUN; p->conn_flags = cpu_to_be32(cf); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index ab1d36cb621..d55a3cb21c3 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2836,7 +2836,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol enum drbd_conns rv = C_MASK; enum drbd_disk_state mydisk; struct net_conf *nc; - int hg, rule_nr, rr_conflict, dry_run; + int hg, rule_nr, rr_conflict, tentative; mydisk = mdev->state.disk; if (mydisk == D_NEGOTIATING) @@ -2916,7 +2916,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol (hg < 0) ? "peer" : "this"); } rr_conflict = nc->rr_conflict; - dry_run = nc->dry_run; + tentative = nc->tentative; rcu_read_unlock(); if (hg == -100) { @@ -2949,7 +2949,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } } - if (dry_run || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) { + if (tentative || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) { if (hg == 0) dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n"); else diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index b93db6c8388..e879a932438 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -164,7 +164,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __flg_field(25, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, discard_my_data) __flg_field_def(26, DRBD_GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF) __flg_field_def(27, DRBD_GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) - __flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, dry_run) + __flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, tentative) __flg_field_def(29, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) ) From 3b7cd457d0c8458f6a4df2854f75fd80c0338f93 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 15 Jun 2011 00:59:04 +0100 Subject: [PATCH 408/609] DRBD: Fix comparison always false warning due to long/long long compare Fix warnings of the following nature in the drbd header: In file included from drivers/block/drbd/drbd_bitmap.c:32: drivers/block/drbd/drbd_int.h: In function 'drbd_get_syncer_progress': drivers/block/drbd/drbd_int.h:2234: warning: comparison is always false due to limited range of data where mdev->rs_total (an unsigned long) is being compared to 1ULL << 32, which is always false on a 32-bit machine. Signed-off-by: David Howells Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index ece2e4a991f..de42c7cf7ca 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -2071,7 +2071,7 @@ static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, * Note: currently we don't support such large bitmaps on 32bit * arch anyways, but no harm done to be prepared for it here. */ - unsigned int shift = mdev->rs_total >= (1ULL << 32) ? 16 : 10; + unsigned int shift = mdev->rs_total > UINT_MAX ? 16 : 10; unsigned long left = *bits_left >> shift; unsigned long total = 1UL + (mdev->rs_total >> shift); unsigned long tmp = 1000UL - left * 1000UL/total; From f03c254961cce65ee2b21c4beccb6975b6f9d308 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 20 Jun 2011 22:21:19 +0200 Subject: [PATCH 409/609] drbd: allow ping-timeout of up to 30 seconds Allow up to 300 centi-seconds to be configured for the "ping timeout". There may be setups where heavy congestion, huge buffers, and asymmetric bandwidth limitations may need a "huge" ping-timeout as work-around for "spurious connection loss" problems. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 3627f760966..82db83410f0 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -62,7 +62,7 @@ /* timeout for the ping packets.*/ #define DRBD_PING_TIMEO_MIN 1 -#define DRBD_PING_TIMEO_MAX 100 +#define DRBD_PING_TIMEO_MAX 300 #define DRBD_PING_TIMEO_DEF 5 /* max number of write requests between write barriers */ From 4dbdae3ec9c75f570e2f128fd25b49ff1fa2a4de Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 21 Jun 2011 01:13:37 +0200 Subject: [PATCH 410/609] drbd: downgraded error printk to info Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index d978e4d98a1..8c9d0348736 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1305,9 +1305,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, drbd_disk_str(mdev->state.disk)); if (!drbd_send_state(mdev)) - dev_warn(DEV, "Notified peer that I am detaching my disk\n"); - else - dev_err(DEV, "Sending state for detaching disk failed\n"); + dev_info(DEV, "Notified peer that I am detaching my disk\n"); drbd_rs_cancel_all(mdev); @@ -1337,7 +1335,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, atomic_set(&mdev->rs_pending_cnt, 0); if (!drbd_send_state(mdev)) - dev_warn(DEV, "Notified peer that I'm now diskless.\n"); + dev_info(DEV, "Notified peer that I'm now diskless.\n"); /* corresponding get_ldev in __drbd_set_state * this may finally trigger drbd_ldev_destroy. */ put_ldev(mdev); From d942ae44537669418a7cbfd916531d30513dbca8 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 31 May 2011 13:07:24 +0200 Subject: [PATCH 411/609] drbd: Fixes from the 8.3 development branch * commit 'ae57a0a': drbd: Only print sanitize state's warnings, if the state change happens drbd: we should write meta data updates with FLUSH FUA drbd: fix limit define, we support 1 PiByte now drbd: fix log message argument order drbd: Typo in user-visible message. drbd: Make "(rcv|snd)buf-size" and "ping-timeout" available for the proxy, too. drbd: Allow keywords to be used in multiple config sections. drbd: fix typos in comments. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 10 +++--- drivers/block/drbd/drbd_state.c | 56 ++++++++++++++++++++++++--------- include/linux/drbd_limits.h | 2 +- 3 files changed, 47 insertions(+), 21 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index de42c7cf7ca..1d71b3a3586 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -372,11 +372,11 @@ struct p_connection_features { u32 protocol_max; /* should be more than enough for future enhancements - * for now, feature_flags and the reserverd array shall be zero. + * for now, feature_flags and the reserved array shall be zero. */ u32 _pad; - u64 reserverd[7]; + u64 reserved[7]; } __packed; struct p_barrier { @@ -914,7 +914,7 @@ struct drbd_conf { atomic_t ap_bio_cnt; /* Requests we need to complete */ atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */ atomic_t rs_pending_cnt; /* RS request/data packets on the wire */ - atomic_t unacked_cnt; /* Need to send replys for */ + atomic_t unacked_cnt; /* Need to send replies for */ atomic_t local_cnt; /* Waiting for local completion */ /* Interval tree of pending local requests */ @@ -2153,7 +2153,7 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev) /* disk state is stable as well. */ break; - /* no new io accepted during tansitional states */ + /* no new io accepted during transitional states */ case D_ATTACHING: case D_FAILED: case D_NEGOTIATING: @@ -2217,7 +2217,7 @@ static inline void inc_ap_bio(struct drbd_conf *mdev) /* we wait here * as long as the device is suspended * until the bitmap is no longer on the fly during connection - * handshake as long as we would exeed the max_buffer limit. + * handshake as long as we would exceed the max_buffer limit. * * to avoid races with the reconnect code, * we need to atomic_inc within the spinlock. */ diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 8c9d0348736..2cf69b25f1e 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -37,6 +37,15 @@ struct after_state_chg_work { struct completion *done; }; +enum sanitize_state_warnings { + NO_WARNING, + ABORTED_ONLINE_VERIFY, + ABORTED_RESYNC, + CONNECTION_LOST_NEGOTIATING, + IMPLICITLY_UPGRADED_DISK, + IMPLICITLY_UPGRADED_PDSK, +}; + static int w_after_state_ch(struct drbd_work *w, int unused); static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); @@ -44,7 +53,7 @@ static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state); static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns); static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns, - const char **warn_sync_abort); + enum sanitize_state_warnings *warn); static inline bool is_susp(union drbd_state s) { @@ -656,6 +665,21 @@ is_valid_transition(union drbd_state os, union drbd_state ns) return rv; } +static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_warnings warn) +{ + static const char *msg_table[] = { + [NO_WARNING] = "", + [ABORTED_ONLINE_VERIFY] = "Online-verify aborted.", + [ABORTED_RESYNC] = "Resync aborted.", + [CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!", + [IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk", + [IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk", + }; + + if (warn != NO_WARNING) + dev_warn(DEV, "%s\n", msg_table[warn]); +} + /** * sanitize_state() - Resolves implicitly necessary additional changes to a state transition * @mdev: DRBD device. @@ -667,11 +691,14 @@ is_valid_transition(union drbd_state os, union drbd_state ns) * to D_UNKNOWN. This rule and many more along those lines are in this function. */ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns, - const char **warn_sync_abort) + enum sanitize_state_warnings *warn) { enum drbd_fencing_p fp; enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max; + if (warn) + *warn = NO_WARNING; + fp = FP_DONT_CARE; if (get_ldev(mdev)) { rcu_read_lock(); @@ -695,10 +722,9 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state /* An implication of the disk states onto the connection state */ /* Abort resync if a disk fails/detaches */ if (ns.conn > C_CONNECTED && (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { - if (warn_sync_abort) - *warn_sync_abort = - ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T ? - "Online-verify" : "Resync"; + if (warn) + *warn = ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T ? + ABORTED_ONLINE_VERIFY : ABORTED_RESYNC; ns.conn = C_CONNECTED; } @@ -709,7 +735,8 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns.disk = mdev->new_state_tmp.disk; ns.pdsk = mdev->new_state_tmp.pdsk; } else { - dev_alert(DEV, "Connection lost while negotiating, no data!\n"); + if (warn) + *warn = CONNECTION_LOST_NEGOTIATING; ns.disk = D_DISKLESS; ns.pdsk = D_UNKNOWN; } @@ -791,16 +818,16 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns.disk = disk_max; if (ns.disk < disk_min) { - dev_warn(DEV, "Implicitly set disk from %s to %s\n", - drbd_disk_str(ns.disk), drbd_disk_str(disk_min)); + if (warn) + *warn = IMPLICITLY_UPGRADED_DISK; ns.disk = disk_min; } if (ns.pdsk > pdsk_max) ns.pdsk = pdsk_max; if (ns.pdsk < pdsk_min) { - dev_warn(DEV, "Implicitly set pdsk from %s to %s\n", - drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min)); + if (warn) + *warn = IMPLICITLY_UPGRADED_PDSK; ns.pdsk = pdsk_min; } @@ -875,12 +902,12 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, { union drbd_state os; enum drbd_state_rv rv = SS_SUCCESS; - const char *warn_sync_abort = NULL; + enum sanitize_state_warnings ssw; struct after_state_chg_work *ascw; os = drbd_read_state(mdev); - ns = sanitize_state(mdev, ns, &warn_sync_abort); + ns = sanitize_state(mdev, ns, &ssw); if (ns.i == os.i) return SS_NOTHING_TO_DO; @@ -909,8 +936,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, return rv; } - if (warn_sync_abort) - dev_warn(DEV, "%s aborted.\n", warn_sync_abort); + print_sanitize_warnings(mdev, ssw); drbd_pr_state_change(mdev, os, ns, flags); diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 82db83410f0..f1046b13d9f 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -126,7 +126,7 @@ * is 1 PiB, currently. */ /* DRBD_MAX_SECTORS */ #define DRBD_DISK_SIZE_MIN 0 -#define DRBD_DISK_SIZE_MAX (16 * (2LLU << 30)) +#define DRBD_DISK_SIZE_MAX (1 * (2LLU << 40)) #define DRBD_DISK_SIZE_DEF 0 /* = disabled = no user size... */ #define DRBD_DISK_SIZE_SCALE 's' /* sectors */ From 2ffca4f3ee6c2d507c39689e5f569bcb0612d3ad Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 30 Jun 2011 15:43:06 +0200 Subject: [PATCH 412/609] drbd: Improve compatibility with drbd's older than 8.3.7 Regression introduced with 8.3.11 commit: drbd: Take a more conservative approach when deciding max_bio_size Never ever tell an older drbd, that we support more than 32KiB in a single data request (packet). Never believe an older drbd, that is supports more than 32KiB in a single data request (packet) Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 3 ++- drivers/block/drbd/drbd_main.c | 6 ++++++ drivers/block/drbd/drbd_nl.c | 7 +++++-- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 1d71b3a3586..6035784f0de 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1284,7 +1284,8 @@ struct bm_extent { #endif #define DRBD_MAX_BIO_SIZE_SAFE (1 << 12) /* Works always = 4k */ -#define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */ +#define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* Header 80 only allows packets up to 32KiB data */ +#define DRBD_MAX_BIO_SIZE_P95 (1 << 17) /* Protocol 95 to 99 allows bios up to 128KiB */ extern int drbd_bm_init(struct drbd_conf *mdev); extern int drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors, int set_new_bits); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 72b1dfa4b65..448de7bf822 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1075,6 +1075,12 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl p = drbd_prepare_command(mdev, sock); if (!p) return -EIO; + + if (mdev->tconn->agreed_pro_version <= 94) + max_bio_size = min_t(int, max_bio_size, DRBD_MAX_SIZE_H80_PACKET); + else if (mdev->tconn->agreed_pro_version < 100) + max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE_P95); + p->d_size = cpu_to_be64(d_size); p->u_size = cpu_to_be64(u_size); p->c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index f92346296ae..838c3cd54cd 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1062,10 +1062,13 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev) BIOs for a single peer_request */ if (mdev->state.conn >= C_CONNECTED) { if (mdev->tconn->agreed_pro_version < 94) - peer = mdev->peer_max_bio_size; + peer = min_t(int, mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); + /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */ else if (mdev->tconn->agreed_pro_version == 94) peer = DRBD_MAX_SIZE_H80_PACKET; - else /* drbd 8.3.8 onwards */ + else if (mdev->tconn->agreed_pro_version < 100) + peer = DRBD_MAX_BIO_SIZE_P95; /* drbd 8.3.8 onwards, before 8.4.0 */ + else peer = DRBD_MAX_BIO_SIZE; } From 9510b2411d365133ed6990d0bcb42be5876bdb66 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 1 Jul 2011 17:00:57 +0200 Subject: [PATCH 413/609] drbd: Fixed state transitions in case reading meta data failes Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 2 +- drivers/block/drbd/drbd_state.c | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 838c3cd54cd..97d1dab045d 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1645,7 +1645,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) force_diskless_dec: put_ldev(mdev); force_diskless: - drbd_force_state(mdev, NS(disk, D_FAILED)); + drbd_force_state(mdev, NS(disk, D_DISKLESS)); drbd_md_sync(mdev); fail: conn_reconfig_done(mdev->tconn); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 2cf69b25f1e..4c13a6f4f18 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -655,13 +655,6 @@ is_valid_transition(union drbd_state os, union drbd_state ns) if (ns.disk == D_FAILED && os.disk == D_DISKLESS) rv = SS_IS_DISKLESS; - /* if we are only D_ATTACHING yet, - * we can (and should) go directly to D_DISKLESS. */ - if (ns.disk == D_FAILED && os.disk == D_ATTACHING) { - printk("TODO: FIX ME\n"); - rv = SS_IS_DISKLESS; - } - return rv; } From 2fcb8f307f6014de9e771799d9ec0f050802c0ac Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 3 Jul 2011 11:41:08 +0200 Subject: [PATCH 414/609] drbd: Improve the "unexpected packet" error messages Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index d55a3cb21c3..7218750d293 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3200,8 +3200,8 @@ static int ignore_remaining_packet(struct drbd_tconn *tconn, struct packet_info */ static int config_unknown_volume(struct drbd_tconn *tconn, struct packet_info *pi) { - conn_warn(tconn, "Volume %u unknown; ignoring %s packet\n", - pi->vnr, cmdname(pi->cmd)); + conn_warn(tconn, "%s packet received for volume %u, which is not configured locally\n", + cmdname(pi->cmd), pi->vnr); return ignore_remaining_packet(tconn, pi); } @@ -4256,13 +4256,15 @@ static void drbdd(struct drbd_tconn *tconn) cmd = &drbd_cmd_handler[pi.cmd]; if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) { - conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size); + conn_err(tconn, "Unexpected data packet %s (0x%04x)", + cmdname(pi.cmd), pi.cmd); goto err_out; } shs = cmd->pkt_size; if (pi.size > shs && !cmd->expect_payload) { - conn_err(tconn, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size); + conn_err(tconn, "No payload expected %s l:%d\n", + cmdname(pi.cmd), pi.size); goto err_out; } @@ -4474,7 +4476,7 @@ static int drbd_do_features(struct drbd_tconn *tconn) if (pi.cmd != P_CONNECTION_FEATURES) { conn_err(tconn, "expected ConnectionFeatures packet, received: %s (0x%04x)\n", - cmdname(pi.cmd), pi.cmd); + cmdname(pi.cmd), pi.cmd); return -1; } @@ -4583,7 +4585,7 @@ static int drbd_do_auth(struct drbd_tconn *tconn) if (pi.cmd != P_AUTH_CHALLENGE) { conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n", - cmdname(pi.cmd), pi.cmd); + cmdname(pi.cmd), pi.cmd); rv = 0; goto fail; } @@ -4642,7 +4644,7 @@ static int drbd_do_auth(struct drbd_tconn *tconn) if (pi.cmd != P_AUTH_RESPONSE) { conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n", - cmdname(pi.cmd), pi.cmd); + cmdname(pi.cmd), pi.cmd); rv = 0; goto fail; } @@ -5192,8 +5194,8 @@ int drbd_asender(struct drbd_thread *thi) goto reconnect; cmd = &asender_tbl[pi.cmd]; if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) { - conn_err(tconn, "unknown command %d on meta (l: %d)\n", - pi.cmd, pi.size); + conn_err(tconn, "Unexpected meta packet %s (0x%04x)\n", + cmdname(pi.cmd), pi.cmd); goto disconnect; } expect = header_size + cmd->pkt_size; From cdfda633d235028e9b27381dedb65416409e8729 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 5 Jul 2011 15:38:59 +0200 Subject: [PATCH 415/609] drbd: detach from frozen backing device * drbd-8.3: documentation: Documented detach's --force and disk's --disk-timeout drbd: Implemented the disk-timeout option drbd: Force flag for the detach operation drbd: Allow new IOs while the local disk in in FAILED state drbd: Bitmap IO functions can not return prematurely if the disk breaks drbd: Added a kref to bm_aio_ctx drbd: Hold a reference to ldev while doing meta-data IO drbd: Keep a reference to the bio until the completion handler finished drbd: Implemented wait_until_done_or_disk_failure() drbd: Replaced md_io_mutex by an atomic: md_io_in_use drbd: moved md_io into mdev drbd: Immediately allow completion of IOs, that wait for IO completions on a failed disk drbd: Keep a reference to barrier acked requests Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 75 +++++++++++++----- drivers/block/drbd/drbd_bitmap.c | 117 +++++++++++++++++++++-------- drivers/block/drbd/drbd_int.h | 12 ++- drivers/block/drbd/drbd_main.c | 77 ++++++++++++++++--- drivers/block/drbd/drbd_nl.c | 28 ++++++- drivers/block/drbd/drbd_receiver.c | 2 - drivers/block/drbd/drbd_req.c | 52 +++++++++---- drivers/block/drbd/drbd_req.h | 19 +++-- drivers/block/drbd/drbd_state.c | 7 ++ drivers/block/drbd/drbd_worker.c | 9 ++- include/linux/drbd_genl.h | 9 ++- include/linux/drbd_limits.h | 6 ++ 12 files changed, 322 insertions(+), 91 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index aeb483daea0..58b5b61628f 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -114,18 +114,44 @@ struct drbd_atodb_wait { static int w_al_write_transaction(struct drbd_work *, int); +void *drbd_md_get_buffer(struct drbd_conf *mdev) +{ + int r; + + wait_event(mdev->misc_wait, + (r = atomic_cmpxchg(&mdev->md_io_in_use, 0, 1)) == 0 || + mdev->state.disk <= D_FAILED); + + return r ? NULL : page_address(mdev->md_io_page); +} + +void drbd_md_put_buffer(struct drbd_conf *mdev) +{ + if (atomic_dec_and_test(&mdev->md_io_in_use)) + wake_up(&mdev->misc_wait); +} + +static bool md_io_allowed(struct drbd_conf *mdev) +{ + enum drbd_disk_state ds = mdev->state.disk; + return ds >= D_NEGOTIATING || ds == D_ATTACHING; +} + +void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done) +{ + wait_event(mdev->misc_wait, *done || !md_io_allowed(mdev)); +} + static int _drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, struct page *page, sector_t sector, int rw, int size) { struct bio *bio; - struct drbd_md_io md_io; int err; - md_io.mdev = mdev; - init_completion(&md_io.event); - md_io.error = 0; + mdev->md_io.done = 0; + mdev->md_io.error = -ENODEV; if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags)) rw |= REQ_FUA | REQ_FLUSH; @@ -137,17 +163,25 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, err = -EIO; if (bio_add_page(bio, page, size, 0) != size) goto out; - bio->bi_private = &md_io; + bio->bi_private = &mdev->md_io; bio->bi_end_io = drbd_md_io_complete; bio->bi_rw = rw; + if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* Corresponding put_ldev in drbd_md_io_complete() */ + dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n"); + err = -ENODEV; + goto out; + } + + bio_get(bio); /* one bio_put() is in the completion handler */ + atomic_inc(&mdev->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */ if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) bio_endio(bio, -EIO); else submit_bio(rw, bio); - wait_for_completion(&md_io.event); + wait_until_done_or_disk_failure(mdev, &mdev->md_io.done); if (bio_flagged(bio, BIO_UPTODATE)) - err = md_io.error; + err = mdev->md_io.error; out: bio_put(bio); @@ -160,7 +194,7 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int err; struct page *iop = mdev->md_io_page; - D_ASSERT(mutex_is_locked(&mdev->md_io_mutex)); + D_ASSERT(atomic_read(&mdev->md_io_in_use) == 1); BUG_ON(!bdev->md_bdev); @@ -344,8 +378,14 @@ w_al_write_transaction(struct drbd_work *w, int unused) return 0; } - mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */ - buffer = page_address(mdev->md_io_page); + buffer = drbd_md_get_buffer(mdev); /* protects md_io_buffer, al_tr_cycle, ... */ + if (!buffer) { + dev_err(DEV, "disk failed while waiting for md_io buffer\n"); + aw->err = -EIO; + complete(&((struct update_al_work *)w)->event); + put_ldev(mdev); + return 1; + } memset(buffer, 0, sizeof(*buffer)); buffer->magic = cpu_to_be32(DRBD_AL_MAGIC); @@ -415,7 +455,7 @@ w_al_write_transaction(struct drbd_work *w, int unused) mdev->al_tr_number++; } - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); complete(&((struct update_al_work *)w)->event); put_ldev(mdev); @@ -506,8 +546,9 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) /* lock out all other meta data io for now, * and make sure the page is mapped. */ - mutex_lock(&mdev->md_io_mutex); - b = page_address(mdev->md_io_page); + b = drbd_md_get_buffer(mdev); + if (!b) + return 0; /* Always use the full ringbuffer space for now. * possible optimization: read in all of it, @@ -528,7 +569,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) /* IO error */ if (rv == -1) { - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); return 0; } @@ -558,7 +599,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) if (!found_valid) { if (found_initialized != mx) dev_warn(DEV, "No usable activity log found.\n"); - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); return 1; } @@ -573,7 +614,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) if (!expect(rv != 0)) goto cancel; if (rv == -1) { - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); return 0; } @@ -643,7 +684,7 @@ cancel: mdev->al_tr_pos = (to + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); /* ok, we are done with it */ - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n", transactions, active_extents); diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 52c48143b22..706e5220dd4 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -918,13 +918,22 @@ void drbd_bm_clear_all(struct drbd_conf *mdev) struct bm_aio_ctx { struct drbd_conf *mdev; atomic_t in_flight; - struct completion done; + unsigned int done; unsigned flags; #define BM_AIO_COPY_PAGES 1 #define BM_AIO_WRITE_HINTED 2 int error; + struct kref kref; }; +static void bm_aio_ctx_destroy(struct kref *kref) +{ + struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref); + + put_ldev(ctx->mdev); + kfree(ctx); +} + /* bv_page may be a copy, or may be the original */ static void bm_async_io_complete(struct bio *bio, int error) { @@ -968,13 +977,16 @@ static void bm_async_io_complete(struct bio *bio, int error) bio_put(bio); - if (atomic_dec_and_test(&ctx->in_flight)) - complete(&ctx->done); + if (atomic_dec_and_test(&ctx->in_flight)) { + ctx->done = 1; + wake_up(&mdev->misc_wait); + kref_put(&ctx->kref, &bm_aio_ctx_destroy); + } } static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) { - struct bio *bio = bio_alloc_drbd(GFP_KERNEL); + struct bio *bio = bio_alloc_drbd(GFP_NOIO); struct drbd_conf *mdev = ctx->mdev; struct drbd_bitmap *b = mdev->bitmap; struct page *page; @@ -1032,12 +1044,7 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must */ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local) { - struct bm_aio_ctx ctx = { - .mdev = mdev, - .in_flight = ATOMIC_INIT(1), - .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), - .flags = flags, - }; + struct bm_aio_ctx *ctx; struct drbd_bitmap *b = mdev->bitmap; int num_pages, i, count = 0; unsigned long now; @@ -1052,7 +1059,27 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w * For lazy writeout, we don't care for ongoing changes to the bitmap, * as we submit copies of pages anyways. */ - if (!ctx.flags) + + ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); + if (!ctx) + return -ENOMEM; + + *ctx = (struct bm_aio_ctx) { + .mdev = mdev, + .in_flight = ATOMIC_INIT(1), + .done = 0, + .flags = flags, + .error = 0, + .kref = { ATOMIC_INIT(2) }, + }; + + if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ + dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n"); + err = -ENODEV; + goto out; + } + + if (!ctx->flags) WARN_ON(!(BM_LOCKED_MASK & b->bm_flags)); num_pages = b->bm_number_of_pages; @@ -1081,32 +1108,40 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w continue; } } - atomic_inc(&ctx.in_flight); - bm_page_io_async(&ctx, i, rw); + atomic_inc(&ctx->in_flight); + bm_page_io_async(ctx, i, rw); ++count; cond_resched(); } /* - * We initialize ctx.in_flight to one to make sure bm_async_io_complete - * will not complete() early, and decrement / test it here. If there + * We initialize ctx->in_flight to one to make sure bm_async_io_complete + * will not set ctx->done early, and decrement / test it here. If there * are still some bios in flight, we need to wait for them here. + * If all IO is done already (or nothing had been submitted), there is + * no need to wait. Still, we need to put the kref associated with the + * "in_flight reached zero, all done" event. */ - if (!atomic_dec_and_test(&ctx.in_flight)) - wait_for_completion(&ctx.done); + if (!atomic_dec_and_test(&ctx->in_flight)) + wait_until_done_or_disk_failure(mdev, &ctx->done); + else + kref_put(&ctx->kref, &bm_aio_ctx_destroy); /* summary for global bitmap IO */ if (flags == 0) dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", - rw == WRITE ? "WRITE" : "READ", - count, jiffies - now); + rw == WRITE ? "WRITE" : "READ", + count, jiffies - now); - if (ctx.error) { + if (ctx->error) { dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); drbd_chk_io_error(mdev, 1, true); - err = -EIO; /* ctx.error ? */ + err = -EIO; /* ctx->error ? */ } + if (atomic_read(&ctx->in_flight)) + err = -EIO; /* Disk failed during IO... */ + now = jiffies; if (rw == WRITE) { drbd_md_flush(mdev); @@ -1121,6 +1156,8 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); +out: + kref_put(&ctx->kref, &bm_aio_ctx_destroy); return err; } @@ -1177,28 +1214,46 @@ int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local) */ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) { - struct bm_aio_ctx ctx = { - .mdev = mdev, - .in_flight = ATOMIC_INIT(1), - .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), - .flags = BM_AIO_COPY_PAGES, - }; + struct bm_aio_ctx *ctx; + int err; if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx); return 0; } - bm_page_io_async(&ctx, idx, WRITE_SYNC); - wait_for_completion(&ctx.done); + ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); + if (!ctx) + return -ENOMEM; - if (ctx.error) + *ctx = (struct bm_aio_ctx) { + .mdev = mdev, + .in_flight = ATOMIC_INIT(1), + .done = 0, + .flags = BM_AIO_COPY_PAGES, + .error = 0, + .kref = { ATOMIC_INIT(2) }, + }; + + if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ + dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n"); + err = -ENODEV; + goto out; + } + + bm_page_io_async(ctx, idx, WRITE_SYNC); + wait_until_done_or_disk_failure(mdev, &ctx->done); + + if (ctx->error) drbd_chk_io_error(mdev, 1, true); /* that should force detach, so the in memory bitmap will be * gone in a moment as well. */ mdev->bm_writ_cnt++; - return ctx.error; + err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error; + out: + kref_put(&ctx->kref, &bm_aio_ctx_destroy); + return err; } /* NOTE diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 6035784f0de..4e582058a7c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -780,8 +780,7 @@ struct drbd_backing_dev { }; struct drbd_md_io { - struct drbd_conf *mdev; - struct completion event; + unsigned int done; int error; }; @@ -852,6 +851,7 @@ struct drbd_tconn { /* is a resource from the config file */ struct drbd_tl_epoch *newest_tle; struct drbd_tl_epoch *oldest_tle; struct list_head out_of_sequence_requests; + struct list_head barrier_acked_requests; struct crypto_hash *cram_hmac_tfm; struct crypto_hash *integrity_tfm; /* checksums we compute, updates protected by tconn->data->mutex */ @@ -978,7 +978,8 @@ struct drbd_conf { atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */ wait_queue_head_t ee_wait; struct page *md_io_page; /* one page buffer for md_io */ - struct mutex md_io_mutex; /* protects the md_io_buffer */ + struct drbd_md_io md_io; + atomic_t md_io_in_use; /* protects the md_io, md_io_page and md_io_tmpp */ spinlock_t al_lock; wait_queue_head_t al_wait; struct lru_cache *act_log; /* activity log */ @@ -1424,9 +1425,12 @@ extern void resume_next_sg(struct drbd_conf *mdev); extern void suspend_other_sg(struct drbd_conf *mdev); extern int drbd_resync_finished(struct drbd_conf *mdev); /* maybe rather drbd_main.c ? */ +extern void *drbd_md_get_buffer(struct drbd_conf *mdev); +extern void drbd_md_put_buffer(struct drbd_conf *mdev); extern int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, sector_t sector, int rw); extern void drbd_ov_out_of_sync_found(struct drbd_conf *, sector_t, int); +extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done); extern void drbd_rs_controller_reset(struct drbd_conf *mdev); static inline void ov_out_of_sync_print(struct drbd_conf *mdev) @@ -2151,12 +2155,12 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev) case D_OUTDATED: case D_CONSISTENT: case D_UP_TO_DATE: + case D_FAILED: /* disk state is stable as well. */ break; /* no new io accepted during transitional states */ case D_ATTACHING: - case D_FAILED: case D_NEGOTIATING: case D_UNKNOWN: case D_MASK: diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 448de7bf822..15384986e4a 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -215,6 +215,7 @@ static int tl_init(struct drbd_tconn *tconn) tconn->oldest_tle = b; tconn->newest_tle = b; INIT_LIST_HEAD(&tconn->out_of_sequence_requests); + INIT_LIST_HEAD(&tconn->barrier_acked_requests); return 1; } @@ -315,7 +316,7 @@ void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr, These have been list_move'd to the out_of_sequence_requests list in _req_mod(, BARRIER_ACKED) above. */ - list_del_init(&b->requests); + list_splice_init(&b->requests, &tconn->barrier_acked_requests); mdev = b->w.mdev; nob = b->next; @@ -417,8 +418,23 @@ void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) b = tmp; list_splice(&carry_reads, &b->requests); } -} + /* Actions operating on the disk state, also want to work on + requests that got barrier acked. */ + switch (what) { + case FAIL_FROZEN_DISK_IO: + case RESTART_FROZEN_DISK_IO: + list_for_each_safe(le, tle, &tconn->barrier_acked_requests) { + req = list_entry(le, struct drbd_request, tl_requests); + _req_mod(req, what); + } + case CONNECTION_LOST_WHILE_PENDING: + case RESEND: + break; + default: + conn_err(tconn, "what = %d in _tl_restart()\n", what); + } +} /** * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL @@ -467,6 +483,42 @@ void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) spin_unlock_irq(&tconn->req_lock); } +/** + * tl_apply() - Applies an event to all requests for a certain mdev in the TL + * @mdev: DRBD device. + * @what: The action/event to perform with all request objects + * + * @what might ony be ABORT_DISK_IO. + */ +void tl_apply(struct drbd_conf *mdev, enum drbd_req_event what) +{ + struct drbd_tconn *tconn = mdev->tconn; + struct drbd_tl_epoch *b; + struct list_head *le, *tle; + struct drbd_request *req; + + D_ASSERT(what == ABORT_DISK_IO); + + spin_lock_irq(&tconn->req_lock); + b = tconn->oldest_tle; + while (b) { + list_for_each_safe(le, tle, &b->requests) { + req = list_entry(le, struct drbd_request, tl_requests); + if (req->w.mdev == mdev) + _req_mod(req, what); + } + b = b->next; + } + + list_for_each_safe(le, tle, &tconn->barrier_acked_requests) { + req = list_entry(le, struct drbd_request, tl_requests); + if (req->w.mdev == mdev) + _req_mod(req, what); + } + + spin_unlock_irq(&tconn->req_lock); +} + static int drbd_thread_setup(void *arg) { struct drbd_thread *thi = (struct drbd_thread *) arg; @@ -2003,8 +2055,8 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) atomic_set(&mdev->rs_sect_in, 0); atomic_set(&mdev->rs_sect_ev, 0); atomic_set(&mdev->ap_in_flight, 0); + atomic_set(&mdev->md_io_in_use, 0); - mutex_init(&mdev->md_io_mutex); mutex_init(&mdev->own_state_mutex); mdev->state_mutex = &mdev->own_state_mutex; @@ -2282,6 +2334,8 @@ void drbd_minor_destroy(struct kref *kref) struct drbd_conf *mdev = container_of(kref, struct drbd_conf, kref); struct drbd_tconn *tconn = mdev->tconn; + del_timer_sync(&mdev->request_timer); + /* paranoia asserts */ D_ASSERT(mdev->open_cnt == 0); D_ASSERT(list_empty(&mdev->tconn->data.work.q)); @@ -2868,8 +2922,10 @@ void drbd_md_sync(struct drbd_conf *mdev) if (!get_ldev_if_state(mdev, D_FAILED)) return; - mutex_lock(&mdev->md_io_mutex); - buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); + buffer = drbd_md_get_buffer(mdev); + if (!buffer) + goto out; + memset(buffer, 0, 512); buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); @@ -2900,7 +2956,8 @@ void drbd_md_sync(struct drbd_conf *mdev) * since we updated it on metadata. */ mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev); - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); +out: put_ldev(mdev); } @@ -2920,8 +2977,9 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) if (!get_ldev_if_state(mdev, D_ATTACHING)) return ERR_IO_MD_DISK; - mutex_lock(&mdev->md_io_mutex); - buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); + buffer = drbd_md_get_buffer(mdev); + if (!buffer) + goto out; if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) { /* NOTE: can't do normal error processing here as this is @@ -2983,7 +3041,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) bdev->disk_conf->al_extents = DRBD_AL_EXTENTS_DEF; err: - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); + out: put_ldev(mdev); return rv; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 97d1dab045d..bf8d0b07762 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1236,6 +1236,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) synchronize_rcu(); kfree(old_disk_conf); kfree(old_plan); + mod_timer(&mdev->request_timer, jiffies + HZ); goto success; fail_unlock: @@ -1628,6 +1629,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) if (rv < SS_SUCCESS) goto force_diskless_dec; + mod_timer(&mdev->request_timer, jiffies + HZ); + if (mdev->state.role == R_PRIMARY) mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1; else @@ -1667,10 +1670,17 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) return 0; } -static int adm_detach(struct drbd_conf *mdev) +static int adm_detach(struct drbd_conf *mdev, int force) { enum drbd_state_rv retcode; int ret; + + if (force) { + drbd_force_state(mdev, NS(disk, D_FAILED)); + retcode = SS_SUCCESS; + goto out; + } + drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ retcode = drbd_request_state(mdev, NS(disk, D_FAILED)); /* D_FAILED will transition to DISKLESS. */ @@ -1681,6 +1691,7 @@ static int adm_detach(struct drbd_conf *mdev) retcode = SS_NOTHING_TO_DO; if (ret) retcode = ERR_INTR; +out: return retcode; } @@ -1692,6 +1703,8 @@ static int adm_detach(struct drbd_conf *mdev) int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; + struct detach_parms parms = { }; + int err; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); if (!adm_ctx.reply_skb) @@ -1699,7 +1712,16 @@ int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - retcode = adm_detach(adm_ctx.mdev); + if (info->attrs[DRBD_NLA_DETACH_PARMS]) { + err = detach_parms_from_attrs(&parms, info); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto out; + } + } + + retcode = adm_detach(adm_ctx.mdev, parms.force_detach); out: drbd_adm_finish(info, retcode); return 0; @@ -3116,7 +3138,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) /* detach */ idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { - retcode = adm_detach(mdev); + retcode = adm_detach(mdev, 0); if (retcode < SS_SUCCESS) { drbd_msg_put_info("failed to detach"); goto out; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7218750d293..3a7e54b8f41 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4366,8 +4366,6 @@ static int drbd_disconnected(struct drbd_conf *mdev) atomic_set(&mdev->rs_pending_cnt, 0); wake_up(&mdev->misc_wait); - del_timer(&mdev->request_timer); - del_timer_sync(&mdev->resync_timer); resync_timer_fn((unsigned long)mdev); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index c4e4553f5c2..8fa51cda3b7 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -213,8 +213,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) { const unsigned long s = req->rq_state; struct drbd_conf *mdev = req->w.mdev; - /* only WRITES may end up here without a master bio (on barrier ack) */ - int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE; + int rw = req->rq_state & RQ_WRITE ? WRITE : READ; /* we must not complete the master bio, while it is * still being processed by _drbd_send_zc_bio (drbd_send_dblock) @@ -225,7 +224,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) * the receiver, * the bio_endio completion callbacks. */ - if (s & RQ_LOCAL_PENDING) + if (s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED)) return; if (req->i.waiting) { /* Retry all conflicting peer requests. */ @@ -288,6 +287,9 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) req->master_bio = NULL; } + if (s & RQ_LOCAL_PENDING) + return; + if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) { /* this is disconnected (local only) operation, * or protocol C P_WRITE_ACK, @@ -362,7 +364,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case COMPLETED_OK: - if (bio_data_dir(req->master_bio) == WRITE) + if (req->rq_state & RQ_WRITE) mdev->writ_cnt += req->i.size >> 9; else mdev->read_cnt += req->i.size >> 9; @@ -374,6 +376,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, put_ldev(mdev); break; + case ABORT_DISK_IO: + req->rq_state |= RQ_LOCAL_ABORTED; + if (req->rq_state & RQ_WRITE) + _req_may_be_done_not_susp(req, m); + else + goto goto_queue_for_net_read; + break; + case WRITE_COMPLETED_WITH_ERROR: req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; @@ -402,6 +412,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, __drbd_chk_io_error(mdev, false); put_ldev(mdev); + goto_queue_for_net_read: + /* no point in retrying if there is no good remote data, * or we have no connection. */ if (mdev->state.pdsk != D_UP_TO_DATE) { @@ -1071,14 +1083,21 @@ void request_timer_fn(unsigned long data) struct drbd_request *req; /* oldest request */ struct list_head *le; struct net_conf *nc; - unsigned long et; /* effective timeout = ko_count * timeout */ + unsigned long ent = 0, dt = 0, et; /* effective timeout = ko_count * timeout */ rcu_read_lock(); nc = rcu_dereference(tconn->net_conf); - et = nc ? nc->timeout * HZ/10 * nc->ko_count : 0; + ent = nc ? nc->timeout * HZ/10 * nc->ko_count : 0; + + if (get_ldev(mdev)) { + dt = rcu_dereference(mdev->ldev->disk_conf)->disk_timeout * HZ / 10; + put_ldev(mdev); + } rcu_read_unlock(); - if (!et || mdev->state.conn < C_WF_REPORT_PARAMS) + et = min_not_zero(dt, ent); + + if (!et || (mdev->state.conn < C_WF_REPORT_PARAMS && mdev->state.disk <= D_FAILED)) return; /* Recurring timer stopped */ spin_lock_irq(&tconn->req_lock); @@ -1091,17 +1110,18 @@ void request_timer_fn(unsigned long data) le = le->prev; req = list_entry(le, struct drbd_request, tl_requests); - if (time_is_before_eq_jiffies(req->start_time + et)) { - if (req->rq_state & RQ_NET_PENDING) { + if (ent && req->rq_state & RQ_NET_PENDING) { + if (time_is_before_eq_jiffies(req->start_time + ent)) { dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n"); - _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL); - } else { - dev_warn(DEV, "Local backing block device frozen?\n"); - mod_timer(&mdev->request_timer, jiffies + et); + _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); + } + } + if (dt && req->rq_state & RQ_LOCAL_PENDING) { + if (time_is_before_eq_jiffies(req->start_time + dt)) { + dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n"); + __drbd_chk_io_error(mdev, 1); } - } else { - mod_timer(&mdev->request_timer, req->start_time + et); } - spin_unlock_irq(&tconn->req_lock); + mod_timer(&mdev->request_timer, req->start_time + et); } diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 5135c95fbf8..f6aff150add 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -106,6 +106,7 @@ enum drbd_req_event { READ_COMPLETED_WITH_ERROR, READ_AHEAD_COMPLETED_WITH_ERROR, WRITE_COMPLETED_WITH_ERROR, + ABORT_DISK_IO, COMPLETED_OK, RESEND, FAIL_FROZEN_DISK_IO, @@ -119,18 +120,21 @@ enum drbd_req_event { * same time, so we should hold the request lock anyways. */ enum drbd_req_state_bits { - /* 210 - * 000: no local possible - * 001: to be submitted + /* 3210 + * 0000: no local possible + * 0001: to be submitted * UNUSED, we could map: 011: submitted, completion still pending - * 110: completed ok - * 010: completed with error + * 0110: completed ok + * 0010: completed with error + * 1001: Aborted (before completion) + * 1x10: Aborted and completed -> free */ __RQ_LOCAL_PENDING, __RQ_LOCAL_COMPLETED, __RQ_LOCAL_OK, + __RQ_LOCAL_ABORTED, - /* 76543 + /* 87654 * 00000: no network possible * 00001: to be send * 00011: to be send, on worker queue @@ -209,8 +213,9 @@ enum drbd_req_state_bits { #define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) #define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED) #define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK) +#define RQ_LOCAL_ABORTED (1UL << __RQ_LOCAL_ABORTED) -#define RQ_LOCAL_MASK ((RQ_LOCAL_OK << 1)-1) /* 0x07 */ +#define RQ_LOCAL_MASK ((RQ_LOCAL_ABORTED << 1)-1) #define RQ_NET_PENDING (1UL << __RQ_NET_PENDING) #define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 4c13a6f4f18..f51cefdbeff 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -29,6 +29,9 @@ #include "drbd_int.h" #include "drbd_req.h" +/* in drbd_main.c */ +extern void tl_apply(struct drbd_conf *mdev, enum drbd_req_event what); + struct after_state_chg_work { struct drbd_work w; union drbd_state os; @@ -1315,6 +1318,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, rcu_read_unlock(); was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); + /* Immediately allow completion of all application IO, that waits + for completion from the local disk. */ + tl_apply(mdev, ABORT_DISK_IO); + /* current state still has to be D_FAILED, * there is only one way out: to D_DISKLESS, * and that may only happen after our put_ldev below. */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 6410c55831e..dac8d9bc4be 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -67,11 +67,18 @@ rwlock_t global_state_lock; void drbd_md_io_complete(struct bio *bio, int error) { struct drbd_md_io *md_io; + struct drbd_conf *mdev; md_io = (struct drbd_md_io *)bio->bi_private; + mdev = container_of(md_io, struct drbd_conf, md_io); + md_io->error = error; - complete(&md_io->event); + md_io->done = 1; + wake_up(&mdev->misc_wait); + bio_put(bio); + drbd_md_put_buffer(mdev); + put_ldev(mdev); } /* reads on behalf of the partner, diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index e879a932438..2e6cefefe5e 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -128,6 +128,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __flg_field_def(17, DRBD_GENLA_F_MANDATORY, disk_flushes, DRBD_DISK_FLUSHES_DEF) __flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) + __u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, @@ -224,6 +225,10 @@ GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms, __flg_field(1, DRBD_GENLA_F_MANDATORY, force_disconnect) ) +GENL_struct(DRBD_NLA_DETACH_PARMS, 13, detach_parms, + __flg_field(1, DRBD_GENLA_F_MANDATORY, force_detach) +) + /* * Notifications and commands (genlmsghdr->cmd) */ @@ -335,7 +340,9 @@ GENL_op( ) GENL_op(DRBD_ADM_DETACH, 18, GENL_doit(drbd_adm_detach), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_DETACH_PARMS, DRBD_GENLA_F_MANDATORY)) + GENL_op(DRBD_ADM_INVALIDATE, 19, GENL_doit(drbd_adm_invalidate), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_INVAL_PEER, 20, GENL_doit(drbd_adm_invalidate_peer), diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index f1046b13d9f..ddd332db2a5 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -50,6 +50,12 @@ #define DRBD_TIMEOUT_MAX 600 #define DRBD_TIMEOUT_DEF 60 /* 6 seconds */ + /* If backing disk takes longer than disk_timeout, mark the disk as failed */ +#define DRBD_DISK_TIMEOUT_MIN 0 /* 0 = disabled */ +#define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */ +#define DRBD_DISK_TIMEOUT_DEF 0 /* disabled */ +#define DRBD_DISK_TIMEOUT_SCALE '1' + /* active connection retries when C_WF_CONNECTION */ #define DRBD_CONNECT_INT_MIN 1 #define DRBD_CONNECT_INT_MAX 120 From d5d7ebd42250620a6da2a8f6943c024391433488 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 5 Jul 2011 20:59:26 +0200 Subject: [PATCH 416/609] drbd: on attach, enforce clean meta data Detection of unclean shutdown has moved into user space. The kernel code will, whenever it updates the meta data, mark it as "unclean", and will refuse to attach to such unclean meta data. "drbdadm up" now schedules "drbdmeta apply-al", which will apply the activity log to the bitmap, and/or reinitialize it, if necessary, as well as set a "clean" indicator flag. This moves a bit code out of kernel space. As a side effect, it also prevents some 8.3 module from accidentally ignoring the 8.4 style activity log, if someone should downgrade, whether on purpose, or accidentally because he changed kernel versions without providing an 8.4 for the new kernel, and the new kernel comes with in-tree 8.3. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 259 ------------------------------- drivers/block/drbd/drbd_int.h | 6 - drivers/block/drbd/drbd_main.c | 26 ++-- drivers/block/drbd/drbd_nl.c | 19 +-- drivers/block/drbd/drbd_state.c | 1 + include/linux/drbd.h | 10 +- 6 files changed, 28 insertions(+), 293 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 58b5b61628f..da8ffd54fc1 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -462,265 +462,6 @@ w_al_write_transaction(struct drbd_work *w, int unused) return 0; } -/* FIXME - * reading of the activity log, - * and potentially dirtying of the affected bitmap regions, - * should be done from userland only. - * DRBD would simply always attach with an empty activity log, - * and refuse to attach to something that looks like a crashed primary. - */ - -/** - * drbd_al_read_tr() - Read a single transaction from the on disk activity log - * @mdev: DRBD device. - * @bdev: Block device to read form. - * @b: pointer to an al_transaction. - * @index: On disk slot of the transaction to read. - * - * Returns -1 on IO error, 0 on checksum error and 1 upon success. - */ -static int drbd_al_read_tr(struct drbd_conf *mdev, - struct drbd_backing_dev *bdev, - int index) -{ - struct al_transaction_on_disk *b = page_address(mdev->md_io_page); - sector_t sector; - u32 crc; - - sector = bdev->md.md_offset - + bdev->md.al_offset - + index * (MD_BLOCK_SIZE>>9); - - /* Dont process error normally, - * as this is done before disk is attached! */ - if (drbd_md_sync_page_io(mdev, bdev, sector, READ)) - return -1; - - if (!expect(b->magic == cpu_to_be32(DRBD_AL_MAGIC))) - return 0; - - if (!expect(be16_to_cpu(b->n_updates) <= AL_UPDATES_PER_TRANSACTION)) - return 0; - - if (!expect(be16_to_cpu(b->context_size) <= DRBD_AL_EXTENTS_MAX)) - return 0; - - if (!expect(be16_to_cpu(b->context_start_slot_nr) < DRBD_AL_EXTENTS_MAX)) - return 0; - - crc = be32_to_cpu(b->crc32c); - b->crc32c = 0; - if (!expect(crc == crc32c(0, b, 4096))) - return 0; - - return 1; -} - -/** - * drbd_al_read_log() - Restores the activity log from its on disk representation. - * @mdev: DRBD device. - * @bdev: Block device to read form. - * - * Returns 1 on success, returns 0 when reading the log failed due to IO errors. - */ -int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) -{ - struct al_transaction_on_disk *b; - int i; - int rv; - int mx; - int active_extents = 0; - int transactions = 0; - int found_valid = 0; - int found_initialized = 0; - int from = 0; - int to = 0; - u32 from_tnr = 0; - u32 to_tnr = 0; - u32 cnr; - - /* Note that this is expected to be called with a newly created, - * clean and all unused activity log of the "expected size". - */ - - /* lock out all other meta data io for now, - * and make sure the page is mapped. - */ - b = drbd_md_get_buffer(mdev); - if (!b) - return 0; - - /* Always use the full ringbuffer space for now. - * possible optimization: read in all of it, - * then scan the in-memory pages. */ - - mx = (MD_AL_SECTORS*512/MD_BLOCK_SIZE); - - /* Find the valid transaction in the log */ - for (i = 0; i < mx; i++) { - rv = drbd_al_read_tr(mdev, bdev, i); - /* invalid data in that block */ - if (rv == 0) - continue; - if (be16_to_cpu(b->transaction_type) == AL_TR_INITIALIZED) { - ++found_initialized; - continue; - } - - /* IO error */ - if (rv == -1) { - drbd_md_put_buffer(mdev); - return 0; - } - - cnr = be32_to_cpu(b->tr_number); - if (++found_valid == 1) { - from = i; - to = i; - from_tnr = cnr; - to_tnr = cnr; - continue; - } - - D_ASSERT(cnr != to_tnr); - D_ASSERT(cnr != from_tnr); - if ((int)cnr - (int)from_tnr < 0) { - D_ASSERT(from_tnr - cnr + i - from == mx); - from = i; - from_tnr = cnr; - } - if ((int)cnr - (int)to_tnr > 0) { - D_ASSERT(cnr - to_tnr == i - to); - to = i; - to_tnr = cnr; - } - } - - if (!found_valid) { - if (found_initialized != mx) - dev_warn(DEV, "No usable activity log found.\n"); - drbd_md_put_buffer(mdev); - return 1; - } - - /* Read the valid transactions. - * dev_info(DEV, "Reading from %d to %d.\n",from,to); */ - i = from; - while (1) { - struct lc_element *e; - unsigned j, n, slot, extent_nr; - - rv = drbd_al_read_tr(mdev, bdev, i); - if (!expect(rv != 0)) - goto cancel; - if (rv == -1) { - drbd_md_put_buffer(mdev); - return 0; - } - - /* deal with different transaction types. - * not yet implemented */ - if (!expect(b->transaction_type == 0)) - goto cancel; - - /* on the fly re-create/resize activity log? - * will be a special transaction type flag. */ - if (!expect(be16_to_cpu(b->context_size) == mdev->act_log->nr_elements)) - goto cancel; - if (!expect(be16_to_cpu(b->context_start_slot_nr) < mdev->act_log->nr_elements)) - goto cancel; - - /* We are the only user of the activity log right now, - * don't actually need to take that lock. */ - spin_lock_irq(&mdev->al_lock); - - /* first, apply the context, ... */ - for (j = 0, slot = be16_to_cpu(b->context_start_slot_nr); - j < AL_CONTEXT_PER_TRANSACTION && - slot < mdev->act_log->nr_elements; j++, slot++) { - extent_nr = be32_to_cpu(b->context[j]); - e = lc_element_by_index(mdev->act_log, slot); - if (e->lc_number != extent_nr) { - if (extent_nr != LC_FREE) - active_extents++; - else - active_extents--; - } - lc_set(mdev->act_log, extent_nr, slot); - } - - /* ... then apply the updates, - * which override the context information. - * drbd_al_read_tr already did the rangecheck - * on n <= AL_UPDATES_PER_TRANSACTION */ - n = be16_to_cpu(b->n_updates); - for (j = 0; j < n; j++) { - slot = be16_to_cpu(b->update_slot_nr[j]); - extent_nr = be32_to_cpu(b->update_extent_nr[j]); - if (!expect(slot < mdev->act_log->nr_elements)) - break; - e = lc_element_by_index(mdev->act_log, slot); - if (e->lc_number != extent_nr) { - if (extent_nr != LC_FREE) - active_extents++; - else - active_extents--; - } - lc_set(mdev->act_log, extent_nr, slot); - } - spin_unlock_irq(&mdev->al_lock); - - transactions++; - -cancel: - if (i == to) - break; - i++; - if (i >= mx) - i = 0; - } - - mdev->al_tr_number = to_tnr+1; - mdev->al_tr_pos = (to + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); - - /* ok, we are done with it */ - drbd_md_put_buffer(mdev); - - dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n", - transactions, active_extents); - - return 1; -} - -/** - * drbd_al_apply_to_bm() - Sets the bitmap to dirty(1) where covered by active AL extents - * @mdev: DRBD device. - */ -void drbd_al_apply_to_bm(struct drbd_conf *mdev) -{ - unsigned int enr; - unsigned long add = 0; - char ppb[10]; - int i, tmp; - - wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); - - for (i = 0; i < mdev->act_log->nr_elements; i++) { - enr = lc_element_by_index(mdev->act_log, i)->lc_number; - if (enr == LC_FREE) - continue; - tmp = drbd_bm_ALe_set_all(mdev, enr); - dynamic_dev_dbg(DEV, "AL: set %d bits in extent %u\n", tmp, enr); - add += tmp; - } - - lc_unlock(mdev->act_log); - wake_up(&mdev->al_wait); - - dev_info(DEV, "Marked additional %s as out-of-sync based on AL.\n", - ppsize(ppb, Bit2KB(add))); -} - static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext) { int rv; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4e582058a7c..9d0d6d0fb82 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -164,10 +164,6 @@ drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) { /* usual integer division */ #define div_floor(A, B) ((A)/(B)) -/* drbd_meta-data.c (still in drbd_main.c) */ -/* 4th incarnation of the disk layout. */ -#define DRBD_MD_MAGIC (DRBD_MAGIC+4) - extern struct ratelimit_state drbd_ratelimit_state; extern struct idr minors; /* RCU, updates: genl_lock() */ extern struct list_head drbd_tconns; /* RCU, updates: genl_lock() */ @@ -1560,7 +1556,6 @@ extern void drbd_rs_cancel_all(struct drbd_conf *mdev); extern int drbd_rs_del_all(struct drbd_conf *mdev); extern void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size); -extern int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *); extern void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go); extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, const char *file, const unsigned int line); @@ -1570,7 +1565,6 @@ extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, const char *file, const unsigned int line); #define drbd_set_out_of_sync(mdev, sector, size) \ __drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__) -extern void drbd_al_apply_to_bm(struct drbd_conf *mdev); extern void drbd_al_shrink(struct drbd_conf *mdev); /* drbd_nl.c */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 15384986e4a..f1d696ab6e8 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2932,7 +2932,7 @@ void drbd_md_sync(struct drbd_conf *mdev) for (i = UI_CURRENT; i < UI_SIZE; i++) buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]); buffer->flags = cpu_to_be32(mdev->ldev->md.flags); - buffer->magic = cpu_to_be32(DRBD_MD_MAGIC); + buffer->magic = cpu_to_be32(DRBD_MD_MAGIC_84_UNCLEAN); buffer->md_size_sect = cpu_to_be32(mdev->ldev->md.md_size_sect); buffer->al_offset = cpu_to_be32(mdev->ldev->md.al_offset); @@ -2967,11 +2967,12 @@ out: * @bdev: Device from which the meta data should be read in. * * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case - * something goes wrong. Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID. + * something goes wrong. */ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) { struct meta_data_on_disk *buffer; + u32 magic, flags; int i, rv = NO_ERROR; if (!get_ldev_if_state(mdev, D_ATTACHING)) @@ -2989,8 +2990,20 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) goto err; } - if (buffer->magic != cpu_to_be32(DRBD_MD_MAGIC)) { - dev_err(DEV, "Error while reading metadata, magic not found.\n"); + magic = be32_to_cpu(buffer->magic); + flags = be32_to_cpu(buffer->flags); + if (magic == DRBD_MD_MAGIC_84_UNCLEAN || + (magic == DRBD_MD_MAGIC_08 && !(flags & MDF_AL_CLEAN))) { + /* btw: that's Activity Log clean, not "all" clean. */ + dev_err(DEV, "Found unclean meta data. Did you \"drbdadm apply-al\"?\n"); + rv = ERR_MD_UNCLEAN; + goto err; + } + if (magic != DRBD_MD_MAGIC_08) { + if (magic == DRBD_MD_MAGIC_07) + dev_err(DEV, "Found old (0.7) meta data magic. Did you \"drbdadm create-md\"?\n"); + else + dev_err(DEV, "Meta data magic not found. Did you \"drbdadm create-md\"?\n"); rv = ERR_MD_INVALID; goto err; } @@ -3035,11 +3048,6 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) } spin_unlock_irq(&mdev->tconn->req_lock); - /* This blocks wants to be get removed... */ - bdev->disk_conf->al_extents = be32_to_cpu(buffer->al_nr_extents); - if (bdev->disk_conf->al_extents < DRBD_AL_EXTENTS_MIN) - bdev->disk_conf->al_extents = DRBD_AL_EXTENTS_DEF; - err: drbd_md_put_buffer(mdev); out: diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index bf8d0b07762..b39f5dc0f47 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1267,7 +1267,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) union drbd_state ns, os; enum drbd_state_rv rv; struct net_conf *nc; - int cp_discovered = 0; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); if (!adm_ctx.reply_skb) @@ -1477,11 +1476,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto force_diskless_dec; } - if (!drbd_al_read_log(mdev, nbc)) { - retcode = ERR_IO_MD_DISK; - goto force_diskless_dec; - } - /* Reset the "barriers don't work" bits here, then force meta data to * be written, to ensure we determine if barriers are supported. */ if (new_disk_conf->md_flushes) @@ -1511,10 +1505,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) clear_bit(CRASHED_PRIMARY, &mdev->flags); if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) && - !(mdev->state.role == R_PRIMARY && mdev->tconn->susp_nod)) { + !(mdev->state.role == R_PRIMARY && mdev->tconn->susp_nod)) set_bit(CRASHED_PRIMARY, &mdev->flags); - cp_discovered = 1; - } mdev->send_cnt = 0; mdev->recv_cnt = 0; @@ -1566,15 +1558,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } } - if (cp_discovered) { - drbd_al_apply_to_bm(mdev); - if (drbd_bitmap_io(mdev, &drbd_bm_write, - "crashed primary apply AL", BM_LOCKED_MASK)) { - retcode = ERR_IO_MD_DISK; - goto force_diskless_dec; - } - } - if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev)) drbd_suspend_al(mdev); /* IO is still suspended here... */ diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index f51cefdbeff..c4d0d96d790 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1017,6 +1017,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE| MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY); + mdf &= ~MDF_AL_CLEAN; if (test_bit(CRASHED_PRIMARY, &mdev->flags)) mdf |= MDF_CRASHED_PRIMARY; if (mdev->state.role == R_PRIMARY || diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 161cd414b03..1e9f754b66a 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -162,6 +162,7 @@ enum drbd_ret_code { ERR_INVALID_REQUEST = 162, ERR_NEED_APV_100 = 163, ERR_NEED_ALLOW_TWO_PRI = 164, + ERR_MD_UNCLEAN = 165, /* insert new ones above this line */ AFTER_LAST_ERR_CODE @@ -321,7 +322,8 @@ extern const char *drbd_set_st_err_str(enum drbd_state_rv); #define MDF_FULL_SYNC (1 << 3) #define MDF_WAS_UP_TO_DATE (1 << 4) #define MDF_PEER_OUT_DATED (1 << 5) -#define MDF_CRASHED_PRIMARY (1 << 6) +#define MDF_CRASHED_PRIMARY (1 << 6) +#define MDF_AL_CLEAN (1 << 7) enum drbd_uuid_index { UI_CURRENT, @@ -341,10 +343,16 @@ enum drbd_timeout_flag { #define UUID_JUST_CREATED ((__u64)4) +/* magic numbers used in meta data and network packets */ #define DRBD_MAGIC 0x83740267 #define DRBD_MAGIC_BIG 0x835a #define DRBD_MAGIC_100 0x8620ec20 +#define DRBD_MD_MAGIC_07 (DRBD_MAGIC+3) +#define DRBD_MD_MAGIC_08 (DRBD_MAGIC+4) +#define DRBD_MD_MAGIC_84_UNCLEAN (DRBD_MAGIC+5) + + /* how I came up with this magic? * base64 decode "actlog==" ;) */ #define DRBD_AL_MAGIC 0x69cb65a2 From 369bea63711ae0595f6ef8c3c02bbf8a17ec608a Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 6 Jul 2011 23:04:44 +0200 Subject: [PATCH 417/609] drbd: Fixed removal of volumes/devices from connected resources When removing a volume/device we need to switch the connection status of the peer back into WFReportParams. Before this fix it was left in Connected state. That means that the peer device continued to inform us about state changes, etc... But we deleted that minor -> protocol error. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 2 ++ drivers/block/drbd/drbd_receiver.c | 1 + drivers/block/drbd/drbd_state.c | 6 ++++-- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index b39f5dc0f47..4e1beb7ee83 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -3061,6 +3061,8 @@ static enum drbd_ret_code adm_delete_minor(struct drbd_conf *mdev) * we may want to delete a minor from a live replication group. */ mdev->state.role == R_SECONDARY) { + _drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS), + CS_VERBOSE + CS_WAIT_COMPLETE); idr_remove(&mdev->tconn->volumes, mdev->vnr); idr_remove(&minors, mdev_to_minor(mdev)); del_gendisk(mdev->vdisk); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 3a7e54b8f41..d4e677c9c76 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3628,6 +3628,7 @@ static union drbd_state convert_state(union drbd_state ps) union drbd_state ms; static enum drbd_conns c_tab[] = { + [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS, [C_CONNECTED] = C_CONNECTED, [C_STARTING_SYNC_S] = C_STARTING_SYNC_T, diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index c4d0d96d790..633b52c7244 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -201,7 +201,8 @@ static int cl_wide_st_chg(struct drbd_conf *mdev, (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) || (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))) || (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) || - (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S); + (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S) || + (os.conn == C_CONNECTED && ns.conn == C_WF_REPORT_PARAMS); } static union drbd_state @@ -1202,7 +1203,8 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, } /* Do not change the order of the if above and the two below... */ - if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */ + if (os.pdsk == D_DISKLESS && + ns.pdsk > D_DISKLESS && ns.pdsk != D_UNKNOWN) { /* attach on the peer */ drbd_send_uuids(mdev); drbd_send_state(mdev); } From a67e1d9e8cca5e05f03a6f45c3220c6fe3adfa95 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 8 Jul 2011 14:47:45 +0200 Subject: [PATCH 418/609] drbd: Eliminated the "notified peer" messages Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 633b52c7244..c4dd667b20e 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1333,9 +1333,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, "ASSERT FAILED: disk is %s during detach\n", drbd_disk_str(mdev->state.disk)); - if (!drbd_send_state(mdev)) - dev_info(DEV, "Notified peer that I am detaching my disk\n"); - + drbd_send_state(mdev); drbd_rs_cancel_all(mdev); /* In case we want to get something to stable storage still, @@ -1363,8 +1361,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, mdev->rs_failed = 0; atomic_set(&mdev->rs_pending_cnt, 0); - if (!drbd_send_state(mdev)) - dev_info(DEV, "Notified peer that I'm now diskless.\n"); + drbd_send_state(mdev); /* corresponding get_ldev in __drbd_set_state * this may finally trigger drbd_ldev_destroy. */ put_ldev(mdev); From e0e1665381a519fd1f588948b1c48b5e609e336d Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 11 Jul 2011 17:04:23 +0200 Subject: [PATCH 419/609] drbd: Correctly handle resources without volumes Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 3 ++- drivers/block/drbd/drbd_state.c | 17 +++++++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 4e1beb7ee83..862d69e9f1a 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1095,7 +1095,8 @@ static void conn_reconfig_done(struct drbd_tconn *tconn) { bool stop_threads; spin_lock_irq(&tconn->req_lock); - stop_threads = conn_all_vols_unconf(tconn); + stop_threads = conn_all_vols_unconf(tconn) && + tconn->cstate == C_STANDALONE; spin_unlock_irq(&tconn->req_lock); if (stop_threads) { /* asender is implicitly stopped by receiver diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index c4dd667b20e..eafc19532e1 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1491,9 +1491,15 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) void conn_old_common_state(struct drbd_tconn *tconn, union drbd_state *pcs, enum chg_state_flags *pf) { enum chg_state_flags flags = ~0; - union drbd_dev_state os, cs = {}; /* old_state, common_state */ struct drbd_conf *mdev; int vnr, first_vol = 1; + union drbd_dev_state os, cs = { + { .role = R_SECONDARY, + .peer = R_UNKNOWN, + .conn = tconn->cstate, + .disk = D_DISKLESS, + .pdsk = D_UNKNOWN, + } }; rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) { @@ -1574,10 +1580,17 @@ void conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, union drbd_state *pns_min, union drbd_state *pns_max, enum chg_state_flags flags) { - union drbd_state ns, os, ns_max = { }; + union drbd_state ns, os, ns_max = { + { .role = R_SECONDARY, + .peer = R_UNKNOWN, + .conn = val.conn, + .disk = D_DISKLESS, + .pdsk = D_UNKNOWN + } }; union drbd_state ns_min = { { .role = R_MASK, .peer = R_MASK, + .conn = val.conn, .disk = D_MASK, .pdsk = D_MASK } }; From 367d675da8fa0041e0f336ecf940992837cc4c50 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 11 Jul 2011 23:49:55 +0200 Subject: [PATCH 420/609] drbd: report net config even for resources without a single volume Currently it is legal (though unusual) to create and connect a resource, before adding in all necessary volumes. We should include the network configuration details, even if we don't have a single volume (yet). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 862d69e9f1a..c27b428b9a5 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2731,14 +2731,18 @@ next_tconn: goto out; if (!mdev) { - /* this is a tconn without a single volume */ + /* This is a tconn without a single volume. + * Suprisingly enough, it may have a network + * configuration. */ + struct net_conf *nc; dh->minor = -1U; dh->ret_code = NO_ERROR; if (nla_put_drbd_cfg_context(skb, tconn, VOLUME_UNSPECIFIED)) - genlmsg_cancel(skb, dh); - else - genlmsg_end(skb, dh); - goto out; + goto cancel; + nc = rcu_dereference(tconn->net_conf); + if (nc && net_conf_to_skb(skb, nc, 1) != 0) + goto cancel; + goto done; } D_ASSERT(mdev->vnr == volume); @@ -2748,9 +2752,11 @@ next_tconn: dh->ret_code = NO_ERROR; if (nla_put_status_info(skb, mdev, NULL)) { +cancel: genlmsg_cancel(skb, dh); goto out; } +done: genlmsg_end(skb, dh); } From 65d94927e036cd8e8e1406fa7fc387b4ae730159 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 13 Jul 2011 10:24:51 +0200 Subject: [PATCH 421/609] drbd: Changed some defaults * Enabled the resync controller, with a fill target of 50Kib. That gives reasonable resync speeds without tuning. A much better default than the 250KiB/s fixed. * Enable bitmap compression. It is save to use, and most people have more CPU power than network bandwidth. * ko-count of 7: Abort a connection if the peer fails to process a write request within 42 seconds. * al-extents of 1237: ~5 GiB seems to be a much more sane default these days. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index ddd332db2a5..defdebfecb7 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -16,7 +16,7 @@ #define DEBUG_RANGE_CHECK 0 #define DRBD_MINOR_COUNT_MIN 1 -#define DRBD_MINOR_COUNT_MAX 256 +#define DRBD_MINOR_COUNT_MAX (1U << 20) #define DRBD_MINOR_COUNT_DEF 32 #define DRBD_VOLUME_MAX 65535 @@ -99,7 +99,7 @@ * 200 should be more than enough even for very short timeouts */ #define DRBD_KO_COUNT_MIN 0 #define DRBD_KO_COUNT_MAX 200 -#define DRBD_KO_COUNT_DEF 0 +#define DRBD_KO_COUNT_DEF 7 /* } */ /* syncer { */ @@ -117,7 +117,7 @@ * 919 * 7 = 6433 */ #define DRBD_AL_EXTENTS_MIN 7 #define DRBD_AL_EXTENTS_MAX 6433 -#define DRBD_AL_EXTENTS_DEF 127 +#define DRBD_AL_EXTENTS_DEF 1237 #define DRBD_MINOR_NUMBER_MIN -1 #define DRBD_MINOR_NUMBER_MAX (1<<30) @@ -151,7 +151,7 @@ #define DRBD_C_PLAN_AHEAD_MIN 0 #define DRBD_C_PLAN_AHEAD_MAX 300 -#define DRBD_C_PLAN_AHEAD_DEF 0 /* RS rate controller disabled by default */ +#define DRBD_C_PLAN_AHEAD_DEF 20 #define DRBD_C_DELAY_TARGET_MIN 1 #define DRBD_C_DELAY_TARGET_MAX 100 @@ -159,7 +159,7 @@ #define DRBD_C_FILL_TARGET_MIN 0 #define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */ -#define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */ +#define DRBD_C_FILL_TARGET_DEF 100 /* Try to place 50KiB in socket send buffer during resync */ #define DRBD_C_MAX_RATE_MIN 250 /* kByte/sec */ #define DRBD_C_MAX_RATE_MAX (4 << 20) @@ -167,7 +167,7 @@ #define DRBD_C_MIN_RATE_MIN 0 /* kByte/sec */ #define DRBD_C_MIN_RATE_MAX (4 << 20) -#define DRBD_C_MIN_RATE_DEF 4096 +#define DRBD_C_MIN_RATE_DEF 250 #define DRBD_CONG_FILL_MIN 0 #define DRBD_CONG_FILL_MAX (10<<21) /* 10GByte in sectors */ @@ -187,6 +187,6 @@ #define DRBD_ALLOW_TWO_PRIMARIES_DEF 0 #define DRBD_ALWAYS_ASBP_DEF 0 -#define DRBD_USE_RLE_DEF 0 +#define DRBD_USE_RLE_DEF 1 #endif From 32bdb64038ba3127245912dae2cc8a450bb1d705 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 9 May 2011 18:26:20 +0200 Subject: [PATCH 422/609] drbd: Define scale factors in a single place Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index defdebfecb7..cd3565cfed4 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -18,29 +18,35 @@ #define DRBD_MINOR_COUNT_MIN 1 #define DRBD_MINOR_COUNT_MAX (1U << 20) #define DRBD_MINOR_COUNT_DEF 32 +#define DRBD_MINOR_COUNT_SCALE '1' #define DRBD_VOLUME_MAX 65535 #define DRBD_DIALOG_REFRESH_MIN 0 #define DRBD_DIALOG_REFRESH_MAX 600 +#define DRBD_DIALOG_REFRESH_SCALE '1' /* valid port number */ #define DRBD_PORT_MIN 1 #define DRBD_PORT_MAX 0xffff +#define DRBD_PORT_SCALE '1' /* startup { */ /* if you want more than 3.4 days, disable */ #define DRBD_WFC_TIMEOUT_MIN 0 #define DRBD_WFC_TIMEOUT_MAX 300000 #define DRBD_WFC_TIMEOUT_DEF 0 +#define DRBD_WFC_TIMEOUT_SCALE '1' #define DRBD_DEGR_WFC_TIMEOUT_MIN 0 #define DRBD_DEGR_WFC_TIMEOUT_MAX 300000 #define DRBD_DEGR_WFC_TIMEOUT_DEF 0 +#define DRBD_DEGR_WFC_TIMEOUT_SCALE '1' #define DRBD_OUTDATED_WFC_TIMEOUT_MIN 0 #define DRBD_OUTDATED_WFC_TIMEOUT_MAX 300000 #define DRBD_OUTDATED_WFC_TIMEOUT_DEF 0 +#define DRBD_OUTDATED_WFC_TIMEOUT_SCALE '1' /* }*/ /* net { */ @@ -49,6 +55,7 @@ #define DRBD_TIMEOUT_MIN 1 #define DRBD_TIMEOUT_MAX 600 #define DRBD_TIMEOUT_DEF 60 /* 6 seconds */ +#define DRBD_TIMEOUT_SCALE '1' /* If backing disk takes longer than disk_timeout, mark the disk as failed */ #define DRBD_DISK_TIMEOUT_MIN 0 /* 0 = disabled */ @@ -60,46 +67,55 @@ #define DRBD_CONNECT_INT_MIN 1 #define DRBD_CONNECT_INT_MAX 120 #define DRBD_CONNECT_INT_DEF 10 /* seconds */ +#define DRBD_CONNECT_INT_SCALE '1' /* keep-alive probes when idle */ #define DRBD_PING_INT_MIN 1 #define DRBD_PING_INT_MAX 120 #define DRBD_PING_INT_DEF 10 +#define DRBD_PING_INT_SCALE '1' /* timeout for the ping packets.*/ #define DRBD_PING_TIMEO_MIN 1 #define DRBD_PING_TIMEO_MAX 300 #define DRBD_PING_TIMEO_DEF 5 +#define DRBD_PING_TIMEO_SCALE '1' /* max number of write requests between write barriers */ #define DRBD_MAX_EPOCH_SIZE_MIN 1 #define DRBD_MAX_EPOCH_SIZE_MAX 20000 #define DRBD_MAX_EPOCH_SIZE_DEF 2048 +#define DRBD_MAX_EPOCH_SIZE_SCALE '1' /* I don't think that a tcp send buffer of more than 10M is useful */ #define DRBD_SNDBUF_SIZE_MIN 0 #define DRBD_SNDBUF_SIZE_MAX (10<<20) #define DRBD_SNDBUF_SIZE_DEF 0 +#define DRBD_SNDBUF_SIZE_SCALE '1' #define DRBD_RCVBUF_SIZE_MIN 0 #define DRBD_RCVBUF_SIZE_MAX (10<<20) #define DRBD_RCVBUF_SIZE_DEF 0 +#define DRBD_RCVBUF_SIZE_SCALE '1' /* @4k PageSize -> 128kB - 512MB */ #define DRBD_MAX_BUFFERS_MIN 32 #define DRBD_MAX_BUFFERS_MAX 131072 #define DRBD_MAX_BUFFERS_DEF 2048 +#define DRBD_MAX_BUFFERS_SCALE '1' /* @4k PageSize -> 4kB - 512MB */ #define DRBD_UNPLUG_WATERMARK_MIN 1 #define DRBD_UNPLUG_WATERMARK_MAX 131072 #define DRBD_UNPLUG_WATERMARK_DEF (DRBD_MAX_BUFFERS_DEF/16) +#define DRBD_UNPLUG_WATERMARK_SCALE '1' /* 0 is disabled. * 200 should be more than enough even for very short timeouts */ #define DRBD_KO_COUNT_MIN 0 #define DRBD_KO_COUNT_MAX 200 #define DRBD_KO_COUNT_DEF 7 +#define DRBD_KO_COUNT_SCALE '1' /* } */ /* syncer { */ @@ -118,6 +134,7 @@ #define DRBD_AL_EXTENTS_MIN 7 #define DRBD_AL_EXTENTS_MAX 6433 #define DRBD_AL_EXTENTS_DEF 1237 +#define DRBD_AL_EXTENTS_SCALE '1' #define DRBD_MINOR_NUMBER_MIN -1 #define DRBD_MINOR_NUMBER_MAX (1<<30) @@ -148,34 +165,42 @@ #define DRBD_MAX_BIO_BVECS_MIN 0 #define DRBD_MAX_BIO_BVECS_MAX 128 #define DRBD_MAX_BIO_BVECS_DEF 0 +#define DRBD_MAX_BIO_BVECS_SCALE '1' #define DRBD_C_PLAN_AHEAD_MIN 0 #define DRBD_C_PLAN_AHEAD_MAX 300 #define DRBD_C_PLAN_AHEAD_DEF 20 +#define DRBD_C_PLAN_AHEAD_SCALE '1' #define DRBD_C_DELAY_TARGET_MIN 1 #define DRBD_C_DELAY_TARGET_MAX 100 #define DRBD_C_DELAY_TARGET_DEF 10 +#define DRBD_C_DELAY_TARGET_SCALE '1' #define DRBD_C_FILL_TARGET_MIN 0 #define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */ #define DRBD_C_FILL_TARGET_DEF 100 /* Try to place 50KiB in socket send buffer during resync */ +#define DRBD_C_FILL_TARGET_SCALE 's' /* sectors */ -#define DRBD_C_MAX_RATE_MIN 250 /* kByte/sec */ +#define DRBD_C_MAX_RATE_MIN 250 #define DRBD_C_MAX_RATE_MAX (4 << 20) #define DRBD_C_MAX_RATE_DEF 102400 +#define DRBD_C_MAX_RATE_SCALE 'k' /* kilobytes */ -#define DRBD_C_MIN_RATE_MIN 0 /* kByte/sec */ +#define DRBD_C_MIN_RATE_MIN 0 #define DRBD_C_MIN_RATE_MAX (4 << 20) #define DRBD_C_MIN_RATE_DEF 250 +#define DRBD_C_MIN_RATE_SCALE 'k' /* kilobytes */ #define DRBD_CONG_FILL_MIN 0 #define DRBD_CONG_FILL_MAX (10<<21) /* 10GByte in sectors */ #define DRBD_CONG_FILL_DEF 0 +#define DRBD_CONG_FILL_SCALE 's' /* sectors */ #define DRBD_CONG_EXTENTS_MIN DRBD_AL_EXTENTS_MIN #define DRBD_CONG_EXTENTS_MAX DRBD_AL_EXTENTS_MAX #define DRBD_CONG_EXTENTS_DEF DRBD_AL_EXTENTS_DEF +#define DRBD_CONG_EXTENTS_SCALE DRBD_AL_EXTENTS_SCALE #define DRBD_PROTOCOL_DEF DRBD_PROT_C From 0317d9ecbc9bac43642b4aa70e3e1106f4fd26a1 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 13 Jul 2011 13:40:30 +0200 Subject: [PATCH 423/609] drbd: Fix the maximum accepted minor device number The maximum minor device number allowed by the kernel is (1<<20 - 1). Reject device numbers higher than that to earlier catch possible errors. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index cd3565cfed4..7d956e91ae7 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -137,7 +137,7 @@ #define DRBD_AL_EXTENTS_SCALE '1' #define DRBD_MINOR_NUMBER_MIN -1 -#define DRBD_MINOR_NUMBER_MAX (1<<30) +#define DRBD_MINOR_NUMBER_MAX ((1 << 20) - 1) #define DRBD_MINOR_NUMBER_DEF -1 #define DRBD_MINOR_NUMBER_SCALE '1' From f2257a56eeb4afb3daea894baf93077af0579586 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 14 Jul 2011 16:00:40 +0200 Subject: [PATCH 424/609] drbd: Allow to create devices with a minor number > minor_count The minor_count module/kernel parameter serves to scale the size of drbd's internal memory pool, but it is no longer a limit for the number of minors or the minor number. (Minor numbers can be arbitrarily high within the allowed limit of 2^20.) Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index c27b428b9a5..bae49bba1cc 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -3034,8 +3034,7 @@ int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - /* FIXME drop minor_count parameter, limit to MINORMASK */ - if (dh->minor >= minor_count) { + if (dh->minor > MINORMASK) { drbd_msg_put_info("requested minor out of range"); retcode = ERR_INVALID_REQUEST; goto out; From 5af172ed9ef2c6b11e0d27c06647f14f6f8115b9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 15 Jul 2011 09:43:23 +0200 Subject: [PATCH 425/609] drbd: Print memory address in hex instead of decimal in error message Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index d4e677c9c76..bfbc10352fa 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1611,7 +1611,7 @@ find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id, if (drbd_contains_interval(root, sector, &req->i) && req->i.local) return req; if (!missing_ok) { - dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func, + dev_err(DEV, "%s: failed to find request 0x%lx, sector %llus\n", func, (unsigned long)id, (unsigned long long)sector); } return NULL; From b792c35cfb336b456e9e2455082ff12d4a76b98f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 15 Jul 2011 16:48:49 +0200 Subject: [PATCH 426/609] drbd: receive_protocol(): Give variables more easily searchable names Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index bfbc10352fa..11f2e7273e1 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3002,7 +3002,7 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) int p_proto, p_discard_my_data, p_two_primaries, cf; struct net_conf *nc, *old_net_conf, *new_net_conf = NULL; char integrity_alg[SHARED_SECRET_MAX] = ""; - struct crypto_hash *peer_tfm = NULL, *tfm = NULL; + struct crypto_hash *peer_integrity_tfm = NULL, *integrity_tfm = NULL; void *int_dig_in = NULL, *int_dig_vv = NULL; p_proto = be32_to_cpu(p->protocol); @@ -3028,15 +3028,15 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) if (integrity_alg[0]) { int hash_size; - peer_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC); - tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC); - if (!(peer_tfm && tfm)) { + peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC); + integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC); + if (!(peer_integrity_tfm && integrity_tfm)) { conn_err(tconn, "peer data-integrity-alg %s not supported\n", integrity_alg); goto disconnect; } - hash_size = crypto_hash_digestsize(tfm); + hash_size = crypto_hash_digestsize(integrity_tfm); int_dig_in = kmalloc(hash_size, GFP_KERNEL); int_dig_vv = kmalloc(hash_size, GFP_KERNEL); if (!(int_dig_in && int_dig_vv)) { @@ -3065,7 +3065,7 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) new_net_conf->integrity_alg_len = strlen(integrity_alg) + 1; crypto_free_hash(tconn->integrity_tfm); - tconn->integrity_tfm = tfm; + tconn->integrity_tfm = integrity_tfm; rcu_assign_pointer(tconn->net_conf, new_net_conf); mutex_unlock(&tconn->conf_update); @@ -3074,7 +3074,7 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) crypto_free_hash(tconn->peer_integrity_tfm); kfree(tconn->int_dig_in); kfree(tconn->int_dig_vv); - tconn->peer_integrity_tfm = peer_tfm; + tconn->peer_integrity_tfm = peer_integrity_tfm; tconn->int_dig_in = int_dig_in; tconn->int_dig_vv = int_dig_vv; @@ -3137,8 +3137,8 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) disconnect_rcu_unlock: rcu_read_unlock(); disconnect: - crypto_free_hash(peer_tfm); - crypto_free_hash(tfm); + crypto_free_hash(peer_integrity_tfm); + crypto_free_hash(integrity_tfm); kfree(int_dig_in); kfree(int_dig_vv); conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); From fbc12f4514279264b5ecd5ae24f94be08e108dcd Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 15 Jul 2011 17:04:26 +0200 Subject: [PATCH 427/609] drbd: receive_protocol(): Make the program flow less confusing Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 91 +++++++++++++++--------------- 1 file changed, 44 insertions(+), 47 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 11f2e7273e1..06bbf0f79fc 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3083,55 +3083,52 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) synchronize_rcu(); kfree(old_net_conf); + } else { + clear_bit(CONN_DRY_RUN, &tconn->flags); - return 0; + if (cf & CF_DRY_RUN) + set_bit(CONN_DRY_RUN, &tconn->flags); + + rcu_read_lock(); + nc = rcu_dereference(tconn->net_conf); + + if (p_proto != nc->wire_protocol) { + conn_err(tconn, "incompatible communication protocols\n"); + goto disconnect_rcu_unlock; + } + + if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) { + conn_err(tconn, "incompatible after-sb-0pri settings\n"); + goto disconnect_rcu_unlock; + } + + if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) { + conn_err(tconn, "incompatible after-sb-1pri settings\n"); + goto disconnect_rcu_unlock; + } + + if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) { + conn_err(tconn, "incompatible after-sb-2pri settings\n"); + goto disconnect_rcu_unlock; + } + + if (p_discard_my_data && nc->discard_my_data) { + conn_err(tconn, "both sides have the 'discard_my_data' flag set\n"); + goto disconnect_rcu_unlock; + } + + if (p_two_primaries != nc->two_primaries) { + conn_err(tconn, "incompatible setting of the two-primaries options\n"); + goto disconnect_rcu_unlock; + } + + if (strcmp(integrity_alg, nc->integrity_alg)) { + conn_err(tconn, "incompatible setting of the data-integrity-alg\n"); + goto disconnect_rcu_unlock; + } + + rcu_read_unlock(); } - - clear_bit(CONN_DRY_RUN, &tconn->flags); - - if (cf & CF_DRY_RUN) - set_bit(CONN_DRY_RUN, &tconn->flags); - - rcu_read_lock(); - nc = rcu_dereference(tconn->net_conf); - - if (p_proto != nc->wire_protocol) { - conn_err(tconn, "incompatible communication protocols\n"); - goto disconnect_rcu_unlock; - } - - if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) { - conn_err(tconn, "incompatible after-sb-0pri settings\n"); - goto disconnect_rcu_unlock; - } - - if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) { - conn_err(tconn, "incompatible after-sb-1pri settings\n"); - goto disconnect_rcu_unlock; - } - - if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) { - conn_err(tconn, "incompatible after-sb-2pri settings\n"); - goto disconnect_rcu_unlock; - } - - if (p_discard_my_data && nc->discard_my_data) { - conn_err(tconn, "both sides have the 'discard_my_data' flag set\n"); - goto disconnect_rcu_unlock; - } - - if (p_two_primaries != nc->two_primaries) { - conn_err(tconn, "incompatible setting of the two-primaries options\n"); - goto disconnect_rcu_unlock; - } - - if (strcmp(integrity_alg, nc->integrity_alg)) { - conn_err(tconn, "incompatible setting of the data-integrity-alg\n"); - goto disconnect_rcu_unlock; - } - - rcu_read_unlock(); - return 0; disconnect_rcu_unlock: From d505d9bef2c86e80e6fba070a2fe459debb8b719 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 15 Jul 2011 17:19:18 +0200 Subject: [PATCH 428/609] drbd: Be consistent in reporting incompatibilities in P_PROTOCOL settings Refer to the settings by the names which drbdsetup and drbd.conf are using. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 06bbf0f79fc..aa674bf8a10 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3093,37 +3093,37 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) nc = rcu_dereference(tconn->net_conf); if (p_proto != nc->wire_protocol) { - conn_err(tconn, "incompatible communication protocols\n"); + conn_err(tconn, "incompatible %s settings\n", "protocol"); goto disconnect_rcu_unlock; } if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) { - conn_err(tconn, "incompatible after-sb-0pri settings\n"); + conn_err(tconn, "incompatible %s settings\n", "after-sb-0pri"); goto disconnect_rcu_unlock; } if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) { - conn_err(tconn, "incompatible after-sb-1pri settings\n"); + conn_err(tconn, "incompatible %s settings\n", "after-sb-1pri"); goto disconnect_rcu_unlock; } if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) { - conn_err(tconn, "incompatible after-sb-2pri settings\n"); + conn_err(tconn, "incompatible %s settings\n", "after-sb-2pri"); goto disconnect_rcu_unlock; } if (p_discard_my_data && nc->discard_my_data) { - conn_err(tconn, "both sides have the 'discard_my_data' flag set\n"); + conn_err(tconn, "incompatible %s settings\n", "discard-my-data"); goto disconnect_rcu_unlock; } if (p_two_primaries != nc->two_primaries) { - conn_err(tconn, "incompatible setting of the two-primaries options\n"); + conn_err(tconn, "incompatible %s settings\n", "allow-two-primaries"); goto disconnect_rcu_unlock; } if (strcmp(integrity_alg, nc->integrity_alg)) { - conn_err(tconn, "incompatible setting of the data-integrity-alg\n"); + conn_err(tconn, "incompatible %s settings\n", "data-integrity-alg"); goto disconnect_rcu_unlock; } From accdbcc5f94ec631aa49eb8efd14b3ddd0626191 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 15 Jul 2011 17:41:09 +0200 Subject: [PATCH 429/609] drbd: receive_protocol(): We cannot change our own data-integrity-alg setting here Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index aa674bf8a10..68a5abaf5ea 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3002,7 +3002,7 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) int p_proto, p_discard_my_data, p_two_primaries, cf; struct net_conf *nc, *old_net_conf, *new_net_conf = NULL; char integrity_alg[SHARED_SECRET_MAX] = ""; - struct crypto_hash *peer_integrity_tfm = NULL, *integrity_tfm = NULL; + struct crypto_hash *peer_integrity_tfm = NULL; void *int_dig_in = NULL, *int_dig_vv = NULL; p_proto = be32_to_cpu(p->protocol); @@ -3028,15 +3028,23 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) if (integrity_alg[0]) { int hash_size; + /* + * We can only change the peer data integrity algorithm + * here. Changing our own data integrity algorithm + * requires that we send a P_PROTOCOL_UPDATE packet at + * the same time; otherwise, the peer has no way to + * tell between which packets the algorithm should + * change. + */ + peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC); - integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC); - if (!(peer_integrity_tfm && integrity_tfm)) { + if (!peer_integrity_tfm) { conn_err(tconn, "peer data-integrity-alg %s not supported\n", integrity_alg); goto disconnect; } - hash_size = crypto_hash_digestsize(integrity_tfm); + hash_size = crypto_hash_digestsize(peer_integrity_tfm); int_dig_in = kmalloc(hash_size, GFP_KERNEL); int_dig_vv = kmalloc(hash_size, GFP_KERNEL); if (!(int_dig_in && int_dig_vv)) { @@ -3064,9 +3072,6 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) strcpy(new_net_conf->integrity_alg, integrity_alg); new_net_conf->integrity_alg_len = strlen(integrity_alg) + 1; - crypto_free_hash(tconn->integrity_tfm); - tconn->integrity_tfm = integrity_tfm; - rcu_assign_pointer(tconn->net_conf, new_net_conf); mutex_unlock(&tconn->conf_update); mutex_unlock(&tconn->data.mutex); @@ -3079,7 +3084,8 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) tconn->int_dig_vv = int_dig_vv; if (strcmp(old_net_conf->integrity_alg, integrity_alg)) - conn_info(tconn, "peer data-integrity-alg: %s\n", integrity_alg); + conn_info(tconn, "peer data-integrity-alg: %s\n", + integrity_alg[0] ? integrity_alg : "(none)"); synchronize_rcu(); kfree(old_net_conf); @@ -3135,7 +3141,6 @@ disconnect_rcu_unlock: rcu_read_unlock(); disconnect: crypto_free_hash(peer_integrity_tfm); - crypto_free_hash(integrity_tfm); kfree(int_dig_in); kfree(int_dig_vv); conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); From 97af09d51ee1ca4a8687b4c3492a1739384d5a42 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 15 Jul 2011 12:14:27 +0200 Subject: [PATCH 430/609] drbd: Dropped wrong clause to generate new current UUIDs Looks like a remainder from long ago. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index eafc19532e1..1763357a065 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1239,11 +1239,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, } if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { - if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) { - drbd_uuid_new_current(mdev); - drbd_send_uuids(mdev); - } - /* D_DISKLESS Peer becomes secondary */ if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) /* We may still be Primary ourselves. From 3fb4746d8d3f9c3e48f7fc65ed24bc54bb8032aa Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 15 Jul 2011 18:44:26 +0200 Subject: [PATCH 431/609] drbd: Consider that the no-data-condition could be in connected state ...when the peer has inconsistent data. In that case we failed to clear the susp_nod flag. When the local disk was attached again Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 1763357a065..60371ba90fb 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1180,7 +1180,8 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, if (os.conn < C_CONNECTED && conn_lowest_conn(mdev->tconn) >= C_CONNECTED) what = RESEND; - if (os.disk == D_ATTACHING && conn_lowest_disk(mdev->tconn) > D_ATTACHING) + if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) && + conn_lowest_disk(mdev->tconn) > D_NEGOTIATING) what = RESTART_FROZEN_DISK_IO; if (what != NOTHING) { From 3b03ad5929e3ba5e1c3db7a70d426f883be061c2 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 15 Jul 2011 13:53:06 +0200 Subject: [PATCH 432/609] drbd: Do not mod_timer() with a past time In case we can not find out why the request takes too long (happens e.g. when IO got suspended on DRBD level). rearm the timer with a reasonable value. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 8fa51cda3b7..d0f73b3a679 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1083,7 +1083,7 @@ void request_timer_fn(unsigned long data) struct drbd_request *req; /* oldest request */ struct list_head *le; struct net_conf *nc; - unsigned long ent = 0, dt = 0, et; /* effective timeout = ko_count * timeout */ + unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */ rcu_read_lock(); nc = rcu_dereference(tconn->net_conf); @@ -1122,6 +1122,7 @@ void request_timer_fn(unsigned long data) __drbd_chk_io_error(mdev, 1); } } + nt = (time_is_before_eq_jiffies(req->start_time + et) ? jiffies : req->start_time) + et; spin_unlock_irq(&tconn->req_lock); - mod_timer(&mdev->request_timer, req->start_time + et); + mod_timer(&mdev->request_timer, nt); } From 9b743da96c8640dbfc864cb5d79c51547c3fadb4 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 15 Jul 2011 18:15:45 +0200 Subject: [PATCH 433/609] drbd: Missing assignment of mdev before drbd_queue_work() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_worker.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index dac8d9bc4be..9ee9b9fab7a 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -790,6 +790,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC); if (w) { w->cb = w_resync_finished; + w->mdev = mdev; drbd_queue_work(&mdev->tconn->data.work, w); return 1; } From 1b7ab15b11716d075b3dca34cf41e8d7aba3cba2 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 15 Jul 2011 17:19:02 +0200 Subject: [PATCH 434/609] drbd: Fixed w_restart_disk_io() to handle non active AL-extents Since we now apply the AL in user space onto the bitmap, the AL is not active for the requests we want to reply. For that a al_write_transaction() that might be called from worker context became necessary. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 70 ++++++++++++++++++++------------ drivers/block/drbd/drbd_worker.c | 4 -- 2 files changed, 45 insertions(+), 29 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index da8ffd54fc1..5731d601951 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -112,7 +112,7 @@ struct drbd_atodb_wait { }; -static int w_al_write_transaction(struct drbd_work *, int); +static int al_write_transaction(struct drbd_conf *mdev); void *drbd_md_get_buffer(struct drbd_conf *mdev) { @@ -272,18 +272,13 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i) /* Double check: it may have been committed by someone else, * while we have been waiting for the lock. */ if (mdev->act_log->pending_changes) { - struct update_al_work al_work; - init_completion(&al_work.event); - al_work.w.cb = w_al_write_transaction; - al_work.w.mdev = mdev; - drbd_queue_work_front(&mdev->tconn->data.work, &al_work.w); - wait_for_completion(&al_work.event); - + int err; + err = al_write_transaction(mdev); mdev->al_writ_cnt++; spin_lock_irq(&mdev->al_lock); /* FIXME - if (al_work.err) + if (err) we need an "lc_cancel" here; */ lc_committed(mdev->act_log); @@ -348,23 +343,20 @@ static unsigned int rs_extent_to_bm_page(unsigned int rs_enr) } static int -w_al_write_transaction(struct drbd_work *w, int unused) +_al_write_transaction(struct drbd_conf *mdev) { - struct update_al_work *aw = container_of(w, struct update_al_work, w); - struct drbd_conf *mdev = w->mdev; struct al_transaction_on_disk *buffer; struct lc_element *e; sector_t sector; int i, mx; unsigned extent_nr; unsigned crc = 0; + int err = 0; if (!get_ldev(mdev)) { dev_err(DEV, "disk is %s, cannot start al transaction\n", drbd_disk_str(mdev->state.disk)); - aw->err = -EIO; - complete(&((struct update_al_work *)w)->event); - return 0; + return -EIO; } /* The bitmap write may have failed, causing a state change. */ @@ -372,19 +364,15 @@ w_al_write_transaction(struct drbd_work *w, int unused) dev_err(DEV, "disk is %s, cannot write al transaction\n", drbd_disk_str(mdev->state.disk)); - aw->err = -EIO; - complete(&((struct update_al_work *)w)->event); put_ldev(mdev); - return 0; + return -EIO; } buffer = drbd_md_get_buffer(mdev); /* protects md_io_buffer, al_tr_cycle, ... */ if (!buffer) { dev_err(DEV, "disk failed while waiting for md_io buffer\n"); - aw->err = -EIO; - complete(&((struct update_al_work *)w)->event); put_ldev(mdev); - return 1; + return -ENODEV; } memset(buffer, 0, sizeof(*buffer)); @@ -444,10 +432,10 @@ w_al_write_transaction(struct drbd_work *w, int unused) buffer->crc32c = cpu_to_be32(crc); if (drbd_bm_write_hinted(mdev)) - aw->err = -EIO; + err = -EIO; /* drbd_chk_io_error done already */ else if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { - aw->err = -EIO; + err = -EIO; drbd_chk_io_error(mdev, 1, true); } else { /* advance ringbuffer position and transaction counter */ @@ -456,10 +444,42 @@ w_al_write_transaction(struct drbd_work *w, int unused) } drbd_md_put_buffer(mdev); - complete(&((struct update_al_work *)w)->event); put_ldev(mdev); - return 0; + return err; +} + + +static int w_al_write_transaction(struct drbd_work *w, int unused) +{ + struct update_al_work *aw = container_of(w, struct update_al_work, w); + struct drbd_conf *mdev = w->mdev; + int err; + + err = _al_write_transaction(mdev); + aw->err = err; + complete(&aw->event); + + return err != -EIO ? err : 0; +} + +/* Calls from worker context (see w_restart_disk_io()) need to write the + transaction directly. Others came through generic_make_request(), + those need to delegate it to the worker. */ +static int al_write_transaction(struct drbd_conf *mdev) +{ + struct update_al_work al_work; + + if (current == mdev->tconn->worker.task) + return _al_write_transaction(mdev); + + init_completion(&al_work.event); + al_work.w.cb = w_al_write_transaction; + al_work.w.mdev = mdev; + drbd_queue_work_front(&mdev->tconn->data.work, &al_work.w); + wait_for_completion(&al_work.event); + + return al_work.err; } static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 9ee9b9fab7a..c57e47c0a1f 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1333,10 +1333,6 @@ int w_restart_disk_io(struct drbd_work *w, int cancel) if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG) drbd_al_begin_io(mdev, &req->i); - /* Calling drbd_al_begin_io() out of the worker might deadlocks - theoretically. Practically it can not deadlock, since this is - only used when unfreezing IOs. All the extents of the requests - that made it into the TL are already active */ drbd_req_make_private_bio(req, req->master_bio); req->private_bio->bi_bdev = mdev->ldev->backing_bdev; From 71fc7eedb37585ab2f1bec2e615202908bd4f4b7 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 17 Jul 2011 23:06:12 +0200 Subject: [PATCH 435/609] drbd: Turn tl_apply() into tl_abort_disk_io() There is no need to overly generalize this function; it only makes the code harder to understand. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 13 ++++--------- drivers/block/drbd/drbd_state.c | 4 ++-- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f1d696ab6e8..13793503766 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -484,28 +484,23 @@ void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) } /** - * tl_apply() - Applies an event to all requests for a certain mdev in the TL + * tl_abort_disk_io() - Abort disk I/O for all requests for a certain mdev in the TL * @mdev: DRBD device. - * @what: The action/event to perform with all request objects - * - * @what might ony be ABORT_DISK_IO. */ -void tl_apply(struct drbd_conf *mdev, enum drbd_req_event what) +void tl_abort_disk_io(struct drbd_conf *mdev) { struct drbd_tconn *tconn = mdev->tconn; struct drbd_tl_epoch *b; struct list_head *le, *tle; struct drbd_request *req; - D_ASSERT(what == ABORT_DISK_IO); - spin_lock_irq(&tconn->req_lock); b = tconn->oldest_tle; while (b) { list_for_each_safe(le, tle, &b->requests) { req = list_entry(le, struct drbd_request, tl_requests); if (req->w.mdev == mdev) - _req_mod(req, what); + _req_mod(req, ABORT_DISK_IO); } b = b->next; } @@ -513,7 +508,7 @@ void tl_apply(struct drbd_conf *mdev, enum drbd_req_event what) list_for_each_safe(le, tle, &tconn->barrier_acked_requests) { req = list_entry(le, struct drbd_request, tl_requests); if (req->w.mdev == mdev) - _req_mod(req, what); + _req_mod(req, ABORT_DISK_IO); } spin_unlock_irq(&tconn->req_lock); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 60371ba90fb..1132d87fa28 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -30,7 +30,7 @@ #include "drbd_req.h" /* in drbd_main.c */ -extern void tl_apply(struct drbd_conf *mdev, enum drbd_req_event what); +extern void tl_abort_disk_io(struct drbd_conf *mdev); struct after_state_chg_work { struct drbd_work w; @@ -1319,7 +1319,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* Immediately allow completion of all application IO, that waits for completion from the local disk. */ - tl_apply(mdev, ABORT_DISK_IO); + tl_abort_disk_io(mdev); /* current state still has to be D_FAILED, * there is only one way out: to D_DISKLESS, From 7d4c782cbda4af0d7dc39cb8e7d50a927781aa1f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 17 Jul 2011 23:06:12 +0200 Subject: [PATCH 436/609] drbd: Fix the data-integrity-alg setting The last data-integrity-alg fix made data integrity checking work when the algorithm was changed for an established connection, but the common case of configuring the algorithm before connecting was still broken. Fix that. Signed-off-by: Andreas Gruenbacher Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 6 +- drivers/block/drbd/drbd_nl.c | 20 ----- drivers/block/drbd/drbd_receiver.c | 130 ++++++++++++++--------------- 3 files changed, 66 insertions(+), 90 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 13793503766..c941d3a2b30 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1775,8 +1775,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) sock = &mdev->tconn->data; p = drbd_prepare_command(mdev, sock); - dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_tfm) ? - crypto_hash_digestsize(mdev->tconn->integrity_tfm) : 0; + dgs = mdev->tconn->integrity_tfm ? crypto_hash_digestsize(mdev->tconn->integrity_tfm) : 0; if (!p) return -EIO; @@ -1849,8 +1848,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, sock = &mdev->tconn->data; p = drbd_prepare_command(mdev, sock); - dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_tfm) ? - crypto_hash_digestsize(mdev->tconn->integrity_tfm) : 0; + dgs = mdev->tconn->integrity_tfm ? crypto_hash_digestsize(mdev->tconn->integrity_tfm) : 0; if (!p) return -EIO; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index bae49bba1cc..853e0a2873f 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1824,8 +1824,6 @@ struct crypto { struct crypto_hash *csums_tfm; struct crypto_hash *cram_hmac_tfm; struct crypto_hash *integrity_tfm; - void *int_dig_in; - void *int_dig_vv; }; static int @@ -1848,7 +1846,6 @@ alloc_crypto(struct crypto *crypto, struct net_conf *new_conf) { char hmac_name[CRYPTO_MAX_ALG_NAME]; enum drbd_ret_code rv; - int hash_size; rv = alloc_hash(&crypto->csums_tfm, new_conf->csums_alg, ERR_CSUMS_ALG); @@ -1869,23 +1866,12 @@ alloc_crypto(struct crypto *crypto, struct net_conf *new_conf) rv = alloc_hash(&crypto->cram_hmac_tfm, hmac_name, ERR_AUTH_ALG); } - if (crypto->integrity_tfm) { - hash_size = crypto_hash_digestsize(crypto->integrity_tfm); - crypto->int_dig_in = kmalloc(hash_size, GFP_KERNEL); - if (!crypto->int_dig_in) - return ERR_NOMEM; - crypto->int_dig_vv = kmalloc(hash_size, GFP_KERNEL); - if (!crypto->int_dig_vv) - return ERR_NOMEM; - } return rv; } static void free_crypto(struct crypto *crypto) { - kfree(crypto->int_dig_in); - kfree(crypto->int_dig_vv); crypto_free_hash(crypto->cram_hmac_tfm); crypto_free_hash(crypto->integrity_tfm); crypto_free_hash(crypto->csums_tfm); @@ -1974,10 +1960,6 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) crypto.verify_tfm = NULL; } - kfree(tconn->int_dig_in); - tconn->int_dig_in = crypto.int_dig_in; - kfree(tconn->int_dig_vv); - tconn->int_dig_vv = crypto.int_dig_vv; crypto_free_hash(tconn->integrity_tfm); tconn->integrity_tfm = crypto.integrity_tfm; if (tconn->cstate >= C_WF_REPORT_PARAMS && tconn->agreed_pro_version >= 100) @@ -2094,8 +2076,6 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(tconn->net_conf, new_conf); conn_free_crypto(tconn); - tconn->int_dig_in = crypto.int_dig_in; - tconn->int_dig_vv = crypto.int_dig_vv; tconn->cram_hmac_tfm = crypto.cram_hmac_tfm; tconn->integrity_tfm = crypto.integrity_tfm; tconn->csums_tfm = crypto.csums_tfm; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 68a5abaf5ea..8d521219480 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3024,72 +3024,7 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) integrity_alg[SHARED_SECRET_MAX - 1] = 0; } - if (pi->cmd == P_PROTOCOL_UPDATE) { - if (integrity_alg[0]) { - int hash_size; - - /* - * We can only change the peer data integrity algorithm - * here. Changing our own data integrity algorithm - * requires that we send a P_PROTOCOL_UPDATE packet at - * the same time; otherwise, the peer has no way to - * tell between which packets the algorithm should - * change. - */ - - peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC); - if (!peer_integrity_tfm) { - conn_err(tconn, "peer data-integrity-alg %s not supported\n", - integrity_alg); - goto disconnect; - } - - hash_size = crypto_hash_digestsize(peer_integrity_tfm); - int_dig_in = kmalloc(hash_size, GFP_KERNEL); - int_dig_vv = kmalloc(hash_size, GFP_KERNEL); - if (!(int_dig_in && int_dig_vv)) { - conn_err(tconn, "Allocation of buffers for data integrity checking failed\n"); - goto disconnect; - } - } - - new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL); - if (!new_net_conf) { - conn_err(tconn, "Allocation of new net_conf failed\n"); - goto disconnect; - } - - mutex_lock(&tconn->data.mutex); - mutex_lock(&tconn->conf_update); - old_net_conf = tconn->net_conf; - *new_net_conf = *old_net_conf; - - new_net_conf->wire_protocol = p_proto; - new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p); - new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p); - new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p); - new_net_conf->two_primaries = p_two_primaries; - strcpy(new_net_conf->integrity_alg, integrity_alg); - new_net_conf->integrity_alg_len = strlen(integrity_alg) + 1; - - rcu_assign_pointer(tconn->net_conf, new_net_conf); - mutex_unlock(&tconn->conf_update); - mutex_unlock(&tconn->data.mutex); - - crypto_free_hash(tconn->peer_integrity_tfm); - kfree(tconn->int_dig_in); - kfree(tconn->int_dig_vv); - tconn->peer_integrity_tfm = peer_integrity_tfm; - tconn->int_dig_in = int_dig_in; - tconn->int_dig_vv = int_dig_vv; - - if (strcmp(old_net_conf->integrity_alg, integrity_alg)) - conn_info(tconn, "peer data-integrity-alg: %s\n", - integrity_alg[0] ? integrity_alg : "(none)"); - - synchronize_rcu(); - kfree(old_net_conf); - } else { + if (pi->cmd != P_PROTOCOL_UPDATE) { clear_bit(CONN_DRY_RUN, &tconn->flags); if (cf & CF_DRY_RUN) @@ -3135,6 +3070,69 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) rcu_read_unlock(); } + + if (integrity_alg[0]) { + int hash_size; + + /* + * We can only change the peer data integrity algorithm + * here. Changing our own data integrity algorithm + * requires that we send a P_PROTOCOL_UPDATE packet at + * the same time; otherwise, the peer has no way to + * tell between which packets the algorithm should + * change. + */ + + peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC); + if (!peer_integrity_tfm) { + conn_err(tconn, "peer data-integrity-alg %s not supported\n", + integrity_alg); + goto disconnect; + } + + hash_size = crypto_hash_digestsize(peer_integrity_tfm); + int_dig_in = kmalloc(hash_size, GFP_KERNEL); + int_dig_vv = kmalloc(hash_size, GFP_KERNEL); + if (!(int_dig_in && int_dig_vv)) { + conn_err(tconn, "Allocation of buffers for data integrity checking failed\n"); + goto disconnect; + } + } + + new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL); + if (!new_net_conf) { + conn_err(tconn, "Allocation of new net_conf failed\n"); + goto disconnect; + } + + mutex_lock(&tconn->data.mutex); + mutex_lock(&tconn->conf_update); + old_net_conf = tconn->net_conf; + *new_net_conf = *old_net_conf; + + new_net_conf->wire_protocol = p_proto; + new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p); + new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p); + new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p); + new_net_conf->two_primaries = p_two_primaries; + + rcu_assign_pointer(tconn->net_conf, new_net_conf); + mutex_unlock(&tconn->conf_update); + mutex_unlock(&tconn->data.mutex); + + crypto_free_hash(tconn->peer_integrity_tfm); + kfree(tconn->int_dig_in); + kfree(tconn->int_dig_vv); + tconn->peer_integrity_tfm = peer_integrity_tfm; + tconn->int_dig_in = int_dig_in; + tconn->int_dig_vv = int_dig_vv; + + if (strcmp(old_net_conf->integrity_alg, integrity_alg)) + conn_info(tconn, "peer data-integrity-alg: %s\n", + integrity_alg[0] ? integrity_alg : "(none)"); + + synchronize_rcu(); + kfree(old_net_conf); return 0; disconnect_rcu_unlock: From 823bd832a60dcf4bf9c162112f34f2f1783d0eaa Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 8 Nov 2012 15:04:36 +0100 Subject: [PATCH 437/609] drbd: Bugfix for the connection behavior If we get into the C_BROKEN_PIPE cstate once, the state engine set the thi->t_state of the receiver thread to restarting. But with the while loop in drbdd_init() a new connection gets established. After the call into drbdd() returns immediately since the thi->t_state is not RUNNING. The restart of drbd_init() then resets thi->t_state to RUNNING. I.e. after entering C_BROKEN_PIPE once, the next successful established connection gets wasted. The two parts of the fix: * Do not cause the thread to restart if we detect the issue with the sockets while we are in C_WF_CONNECTION. * Make sure that all actions that would have set us to C_BROKEN_PIPE happen before the state change to C_WF_REPORT_PARAMS. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 10 +++++----- drivers/block/drbd/drbd_state.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 8d521219480..fff55657e0d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -987,14 +987,9 @@ retry: } } - if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS) - return 0; - sock->sk->sk_sndtimeo = timeout; sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; - drbd_thread_start(&tconn->asender); - if (drbd_send_protocol(tconn) == -EOPNOTSUPP) return -1; @@ -1008,6 +1003,11 @@ retry: } rcu_read_unlock(); + if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS) + return 0; + + drbd_thread_start(&tconn->asender); + return h; out_release_sockets: diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 1132d87fa28..ecc5e276166 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1055,7 +1055,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, drbd_thread_stop_nowait(&mdev->tconn->receiver); /* Upon network failure, we need to restart the receiver. */ - if (os.conn > C_TEAR_DOWN && + if (os.conn > C_WF_CONNECTION && ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) drbd_thread_restart_nowait(&mdev->tconn->receiver); From f497609e4c50c6162b51359d560f2454b184e0ec Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 17 Jul 2011 23:06:12 +0200 Subject: [PATCH 438/609] drbd: Get rid of MR_{READ,WRITE}_SHIFT Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 6 ++++-- drivers/block/drbd/drbd_req.h | 6 ++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index c941d3a2b30..2090b78231a 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -368,8 +368,10 @@ void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) req = list_entry(le, struct drbd_request, tl_requests); rv = _req_mod(req, what); - n_writes += (rv & MR_WRITE) >> MR_WRITE_SHIFT; - n_reads += (rv & MR_READ) >> MR_READ_SHIFT; + if (rv & MR_WRITE) + n_writes++; + if (rv & MR_READ) + n_reads++; } tmp = b->next; diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index f6aff150add..68f54050b7c 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -235,10 +235,8 @@ enum drbd_req_state_bits { /* For waking up the frozen transfer log mod_req() has to return if the request should be counted in the epoch object*/ -#define MR_WRITE_SHIFT 0 -#define MR_WRITE (1 << MR_WRITE_SHIFT) -#define MR_READ_SHIFT 1 -#define MR_READ (1 << MR_READ_SHIFT) +#define MR_WRITE 1 +#define MR_READ 2 static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src) { From 97ddb68790891ed568ab8212189f5173dd600ea4 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 15 Jul 2011 23:52:44 +0200 Subject: [PATCH 439/609] drbd: detach must not try to abort non-local requests Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 4 ++++ drivers/block/drbd/drbd_req.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2090b78231a..edd0227f4b4 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -501,6 +501,8 @@ void tl_abort_disk_io(struct drbd_conf *mdev) while (b) { list_for_each_safe(le, tle, &b->requests) { req = list_entry(le, struct drbd_request, tl_requests); + if (!(req->rq_state & RQ_LOCAL_PENDING)) + continue; if (req->w.mdev == mdev) _req_mod(req, ABORT_DISK_IO); } @@ -509,6 +511,8 @@ void tl_abort_disk_io(struct drbd_conf *mdev) list_for_each_safe(le, tle, &tconn->barrier_acked_requests) { req = list_entry(le, struct drbd_request, tl_requests); + if (!(req->rq_state & RQ_LOCAL_PENDING)) + continue; if (req->w.mdev == mdev) _req_mod(req, ABORT_DISK_IO); } diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index d0f73b3a679..60fc186d0a3 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -436,6 +436,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* so we can verify the handle in the answer packet * corresponding hlist_del is in _req_may_be_done() */ + D_ASSERT(drbd_interval_empty(&req->i)); drbd_insert_interval(&mdev->read_requests, &req->i); set_bit(UNPLUG_REMOTE, &mdev->flags); @@ -453,6 +454,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* from __drbd_make_request only */ /* corresponding hlist_del is in _req_may_be_done() */ + D_ASSERT(drbd_interval_empty(&req->i)); drbd_insert_interval(&mdev->write_requests, &req->i); /* NOTE From f132f554cea3ad1333ff8ea1ac23e1085abd639e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 18 Jul 2011 10:44:24 +0200 Subject: [PATCH 440/609] drbd: Do not display bogus log lines for pdsk in case pdsk < D_UNKNOWN This was a regression recently introduced with commit 7848ddb752c09b6dfd1ddfabb06b69b08aa8f6b9 "drbd: Correctly handle resources without volumes" Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index ecc5e276166..9aefca69e4e 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1576,13 +1576,7 @@ void conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, union drbd_state *pns_min, union drbd_state *pns_max, enum chg_state_flags flags) { - union drbd_state ns, os, ns_max = { - { .role = R_SECONDARY, - .peer = R_UNKNOWN, - .conn = val.conn, - .disk = D_DISKLESS, - .pdsk = D_UNKNOWN - } }; + union drbd_state ns, os, ns_max = { }; union drbd_state ns_min = { { .role = R_MASK, .peer = R_MASK, @@ -1592,13 +1586,14 @@ conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state } }; struct drbd_conf *mdev; enum drbd_state_rv rv; - int vnr; + int vnr, number_of_volumes = 0; if (mask.conn == C_MASK) tconn->cstate = val.conn; rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) { + number_of_volumes++; os = drbd_read_state(mdev); ns = apply_mask_val(os, mask, val); ns = sanitize_state(mdev, ns, NULL); @@ -1625,6 +1620,16 @@ conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state } rcu_read_unlock(); + if (number_of_volumes == 0) { + ns_min = ns_max = (union drbd_state) { { + .role = R_SECONDARY, + .peer = R_UNKNOWN, + .conn = val.conn, + .disk = D_DISKLESS, + .pdsk = D_UNKNOWN + } }; + } + ns_min.susp = ns_max.susp = tconn->susp; ns_min.susp_nod = ns_max.susp_nod = tconn->susp_nod; ns_min.susp_fen = ns_max.susp_fen = tconn->susp_fen; From b80c043327ea4faac62a329a1d35f16c47a5128e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 18 Jul 2011 11:09:17 +0200 Subject: [PATCH 441/609] drbd: The minor_count module parameter is only a hint nowadays * The max of minor_count is 255 * In drbdadm count the number of minors, instead of finding the highest minor number * No longer us the magic in the init script Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 7d956e91ae7..6d0a24331ed 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -16,7 +16,7 @@ #define DEBUG_RANGE_CHECK 0 #define DRBD_MINOR_COUNT_MIN 1 -#define DRBD_MINOR_COUNT_MAX (1U << 20) +#define DRBD_MINOR_COUNT_MAX 255 #define DRBD_MINOR_COUNT_DEF 32 #define DRBD_MINOR_COUNT_SCALE '1' From 85d735138a6ee343fe689e308163cf48bb54d162 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 18 Jul 2011 15:45:15 +0200 Subject: [PATCH 442/609] drbd: Cleanup all epoch objects upon connection loss Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index fff55657e0d..cd140bc0b56 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1125,13 +1125,14 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, if (epoch_size != 0 && atomic_read(&epoch->active) == 0 && - test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) { + (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) { if (!(ev & EV_CLEANUP)) { spin_unlock(&mdev->epoch_lock); drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size); spin_lock(&mdev->epoch_lock); } - dec_unacked(mdev); + if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) + dec_unacked(mdev); if (mdev->current_epoch != epoch) { next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list); From 4276dea70cc8e3ed22c567b0b5e683602769ce03 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 16 Jun 2011 17:58:02 +0200 Subject: [PATCH 443/609] drbd: Remove dead code Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 5731d601951..34048e8441d 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -104,14 +104,6 @@ struct update_al_work { int err; }; -struct drbd_atodb_wait { - atomic_t count; - struct completion io_done; - struct drbd_conf *mdev; - int error; -}; - - static int al_write_transaction(struct drbd_conf *mdev); void *drbd_md_get_buffer(struct drbd_conf *mdev) From c4e7afdc01b9496f81653959dac8093030c8f286 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 16 Aug 2011 11:59:43 +0200 Subject: [PATCH 444/609] drbd: Remove unused code Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 853e0a2873f..92adb3dc82c 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2388,15 +2388,6 @@ out: return 0; } -static int drbd_bmio_set_susp_al(struct drbd_conf *mdev) -{ - int rv; - - rv = drbd_bmio_set_n_write(mdev); - drbd_suspend_al(mdev); - return rv; -} - static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info, union drbd_state mask, union drbd_state val) { From a209b4aec31d4b672b7a70f5de272ebf6ce40e1b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 17 Aug 2011 12:43:25 +0200 Subject: [PATCH 445/609] drbd: Update some outdated comments to match the code Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 4 ++-- drivers/block/drbd/drbd_nl.c | 2 +- drivers/block/drbd/drbd_req.c | 7 +++---- drivers/block/drbd/drbd_worker.c | 2 +- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 706e5220dd4..092f8273e6b 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -389,8 +389,8 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) /* Trying kmalloc first, falling back to vmalloc. * GFP_KERNEL is ok, as this is done when a lower level disk is - * "attached" to the drbd. Context is receiver thread or cqueue - * thread. As we have no disk yet, we are not in the IO path, + * "attached" to the drbd. Context is receiver thread or drbdsetup / + * netlink process. As we have no disk yet, we are not in the IO path, * not even the IO path of the peer. */ bytes = sizeof(struct page *)*want; new_pages = kmalloc(bytes, GFP_KERNEL); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 92adb3dc82c..684e2e4d48e 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2038,7 +2038,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) goto fail; } - /* allocation not in the IO path, cqueue thread context */ + /* allocation not in the IO path, drbdsetup / netlink process context */ new_conf = kzalloc(sizeof(*new_conf), GFP_KERNEL); if (!new_conf) { retcode = ERR_NOMEM; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 60fc186d0a3..d61309db14a 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -292,13 +292,12 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) { /* this is disconnected (local only) operation, - * or protocol C P_WRITE_ACK, - * or protocol A or B P_BARRIER_ACK, + * or protocol A, B, or C P_BARRIER_ACK, * or killed from the transfer log due to connection loss. */ _req_is_done(mdev, req, rw); } /* else: network part and not DONE yet. that is - * protocol A or B, barrier ack still pending... */ + * protocol A, B, or C, barrier ack still pending... */ } static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_error *m) @@ -668,7 +667,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; if (req->rq_state & RQ_NET_PENDING) { - /* barrier came in before all requests have been acked. + /* barrier came in before all requests were acked. * this is bad, because if the connection is lost now, * we won't be able to clean them up... */ dev_err(DEV, "FIXME (BARRIER_ACKED but pending)\n"); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index c57e47c0a1f..34a6065d95e 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -969,7 +969,7 @@ int w_e_end_data_req(struct drbd_work *w, int cancel) } /** - * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUESTRS + * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST * @mdev: DRBD device. * @w: work object. * @cancel: The connection will be closed anyways From 25e409321a950b8888830c6ddb7a9774ec38f608 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 19 Aug 2011 10:39:00 +0200 Subject: [PATCH 446/609] drbd: fix connect failure with all default net-options If no net-options are configured (all on their default), no DRBD_NLA_NET_CONF will be passed to the kernel. The kernel must not require its presence, there is no required option in there. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 684e2e4d48e..e64b1c897c8 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2048,7 +2048,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) set_net_conf_defaults(new_conf); err = net_conf_from_attrs(new_conf, info); - if (err) { + if (err && err != -ENOMSG) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); goto fail; From 935be260c1e92a1d9c251973d866b68bf2c20f12 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 19 Aug 2011 13:47:31 +0200 Subject: [PATCH 447/609] drbd: Improve error reporting in drbd_md_sync_page_io() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 34048e8441d..4dd52088f24 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -202,8 +202,8 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, err = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, MD_BLOCK_SIZE); if (err) { - dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed!\n", - (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); + dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n", + (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err); } return err; } From 08b165ba11ebdb6b037a6071ae599f1d8c93807e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 5 Sep 2011 16:22:33 +0200 Subject: [PATCH 448/609] drbd: Consider the discard-my-data flag for all volumes [bugz 359] ...not only for the first volume Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_receiver.c | 24 +++++++++++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 9d0d6d0fb82..ccf28e986d4 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -697,6 +697,7 @@ enum { AL_SUSPENDED, /* Activity logging is currently suspended. */ AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */ B_RS_H_DONE, /* Before resync handler done (already executed) */ + DISCARD_MY_DATA, /* discard_my_data flag per volume */ }; struct drbd_bitmap; /* opaque for drbd_conf */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index cd140bc0b56..4004a682d0e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -846,6 +846,7 @@ static int conn_connect(struct drbd_tconn *tconn) struct drbd_conf *mdev; struct net_conf *nc; int vnr, timeout, try, h, ok; + bool discard_my_data; if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS) return -2; @@ -960,6 +961,7 @@ retry: msock->sk->sk_rcvtimeo = nc->ping_int*HZ; timeout = nc->timeout * HZ / 10; + discard_my_data = nc->discard_my_data; rcu_read_unlock(); msock->sk->sk_sndtimeo = timeout; @@ -997,6 +999,12 @@ retry: idr_for_each_entry(&tconn->volumes, mdev, vnr) { kref_get(&mdev->kref); rcu_read_unlock(); + + if (discard_my_data) + set_bit(DISCARD_MY_DATA, &mdev->flags); + else + clear_bit(DISCARD_MY_DATA, &mdev->flags); + drbd_connected(mdev); kref_put(&mdev->kref, &drbd_minor_destroy); rcu_read_lock(); @@ -1008,6 +1016,14 @@ retry: drbd_thread_start(&tconn->asender); + mutex_lock(&tconn->conf_update); + /* The discard_my_data flag is a single-shot modifier to the next + * connection attempt, the handshake of which is now well underway. + * No need for rcu style copying of the whole struct + * just to clear a single value. */ + tconn->net_conf->discard_my_data = 0; + mutex_unlock(&tconn->conf_update); + return h; out_release_sockets: @@ -2906,9 +2922,9 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } if (hg == -100) { - if (nc->discard_my_data && !(mdev->p_uuid[UI_FLAGS]&1)) + if (test_bit(DISCARD_MY_DATA, &mdev->flags) && !(mdev->p_uuid[UI_FLAGS]&1)) hg = -1; - if (!nc->discard_my_data && (mdev->p_uuid[UI_FLAGS]&1)) + if (!test_bit(DISCARD_MY_DATA, &mdev->flags) && (mdev->p_uuid[UI_FLAGS]&1)) hg = 1; if (abs(hg) < 100) @@ -3856,9 +3872,7 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) } } - mutex_lock(&mdev->tconn->conf_update); - mdev->tconn->net_conf->discard_my_data = 0; /* without copy; single bit op is atomic */ - mutex_unlock(&mdev->tconn->conf_update); + clear_bit(DISCARD_MY_DATA, &mdev->flags); drbd_md_sync(mdev); /* update connected indicator, la_size, ... */ From 22d81140aea85f9ac388fa12768dc502ef00eaae Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 26 Sep 2011 09:58:04 +0200 Subject: [PATCH 449/609] drbd: fix bitmap writeout after aborted resync Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 092f8273e6b..fcbc5e1ca50 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1448,13 +1448,21 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b, { int i; int bits; + int changed = 0; unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_IRQ1); for (i = first_word; i < last_word; i++) { bits = hweight_long(paddr[i]); paddr[i] = ~0UL; - b->bm_set += BITS_PER_LONG - bits; + changed += BITS_PER_LONG - bits; } kunmap_atomic(paddr, KM_IRQ1); + if (changed) { + /* We only need lazy writeout, the information is still in the + * remote bitmap as well, and is reconstructed during the next + * bitmap exchange, if lost locally due to a crash. */ + bm_set_page_lazy_writeout(b->bm_pages[page_nr]); + b->bm_set += changed; + } } /* Same thing as drbd_bm_set_bits, From 9bcd2521827bb7c418a83a77474c449d6496d55c Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 29 Sep 2011 13:00:14 +0200 Subject: [PATCH 450/609] drbd: fix "stalled" empty resync With sync-after dependencies, given "lucky" timing of pause/unpause events, and the end of an empty (0 bits set) resync was sometimes not detected on the SyncTarget, leading to a "stalled" SyncSource state. Fixed this by expecting not only "Inconsistent -> UpToDate" but also "Consistent -> UpToDate" transitions for the peer disk state to end a resync. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 8 ++++++-- drivers/block/drbd/drbd_receiver.c | 11 ++++++++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index e64b1c897c8..16c3710e1b9 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2262,7 +2262,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) if (rs.no_resync && mdev->tconn->agreed_pro_version < 93) { retcode = ERR_NEED_APV_93; - goto fail; + goto fail_ldev; } rcu_read_lock(); @@ -2272,7 +2272,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL); if (!new_disk_conf) { retcode = ERR_NOMEM; - goto fail; + goto fail_ldev; } } @@ -2310,6 +2310,10 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) fail: drbd_adm_finish(info, retcode); return 0; + + fail_ldev: + put_ldev(mdev); + goto fail; } int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 4004a682d0e..aba04d7dadf 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3748,9 +3748,14 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) os = ns = drbd_read_state(mdev); spin_unlock_irq(&mdev->tconn->req_lock); - /* peer says his disk is uptodate, while we think it is inconsistent, - * and this happens while we think we have a sync going on. */ - if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE && + /* If this is the "end of sync" confirmation, usually the peer disk + * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits + * set) resync started in PausedSyncT, or if the timing of pause-/ + * unpause-sync events has been "just right", the peer disk may + * transition from D_CONSISTENT to D_UP_TO_DATE as well. + */ + if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) && + real_peer_disk == D_UP_TO_DATE && os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) { /* If we are (becoming) SyncSource, but peer is still in sync * preparation, ignore its uptodate-ness to avoid flapping, it From 376694a0543f5da194d9c6e102cfcd023ffac949 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Nov 2011 10:54:28 +0100 Subject: [PATCH 451/609] drbd: Silenced compiler warnings Since version 4.6.1 gcc warns about variables that get a value assigned, but which are never read later on. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 11 +++-------- drivers/block/drbd/drbd_receiver.c | 13 ++----------- drivers/block/drbd/drbd_req.c | 2 ++ drivers/block/drbd/drbd_state.c | 9 --------- 4 files changed, 7 insertions(+), 28 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 4dd52088f24..ade79be4e27 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -264,8 +264,7 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i) /* Double check: it may have been committed by someone else, * while we have been waiting for the lock. */ if (mdev->act_log->pending_changes) { - int err; - err = al_write_transaction(mdev); + al_write_transaction(mdev); mdev->al_writ_cnt++; spin_lock_irq(&mdev->al_lock); @@ -290,7 +289,6 @@ void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval *i) unsigned enr; struct lc_element *extent; unsigned long flags; - bool wake = false; spin_lock_irqsave(&mdev->al_lock, flags); @@ -300,8 +298,7 @@ void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval *i) dev_err(DEV, "al_complete_io() called on inactive extent %u\n", enr); continue; } - if (lc_put(mdev->act_log, extent) == 0) - wake = true; + lc_put(mdev->act_log, extent); } spin_unlock_irqrestore(&mdev->al_lock, flags); wake_up(&mdev->al_wait); @@ -730,7 +727,7 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, const char *file, const unsigned int line) { - unsigned long sbnr, ebnr, lbnr, flags; + unsigned long sbnr, ebnr, flags; sector_t esector, nr_sectors; unsigned int enr, count = 0; struct lc_element *e; @@ -752,8 +749,6 @@ int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, if (!expect(esector < nr_sectors)) esector = nr_sectors - 1; - lbnr = BM_SECT_TO_BIT(nr_sectors-1); - /* we set it out of sync, * we do not need to round anything here */ sbnr = BM_SECT_TO_BIT(sector); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index aba04d7dadf..e823968f1cd 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4324,7 +4324,7 @@ static void conn_disconnect(struct drbd_tconn *tconn) { struct drbd_conf *mdev; enum drbd_conns oc; - int vnr, rv = SS_UNKNOWN_ERROR; + int vnr; if (tconn->cstate == C_STANDALONE) return; @@ -4351,7 +4351,7 @@ static void conn_disconnect(struct drbd_tconn *tconn) spin_lock_irq(&tconn->req_lock); oc = tconn->cstate; if (oc >= C_UNCONNECTED) - rv = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE); + _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE); spin_unlock_irq(&tconn->req_lock); @@ -4361,7 +4361,6 @@ static void conn_disconnect(struct drbd_tconn *tconn) static int drbd_disconnected(struct drbd_conf *mdev) { - enum drbd_fencing_p fp; unsigned int i; /* wait for current activity to cease. */ @@ -4405,14 +4404,6 @@ static int drbd_disconnected(struct drbd_conf *mdev) drbd_md_sync(mdev); - fp = FP_DONT_CARE; - if (get_ldev(mdev)) { - rcu_read_lock(); - fp = rcu_dereference(mdev->ldev->disk_conf)->fencing; - rcu_read_unlock(); - put_ldev(mdev); - } - /* serialize with bitmap writeout triggered by the state change, * if any. */ wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index d61309db14a..42bdf36cef1 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -39,6 +39,8 @@ static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req cpu = part_stat_lock(); part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]); part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio)); + (void) cpu; /* The macro invocations above want the cpu argument, I do not like + the compiler warning about cpu only assigned but never used... */ part_inc_in_flight(&mdev->vdisk->part0, rw); part_stat_unlock(); } diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 9aefca69e4e..4075bd2d251 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1143,7 +1143,6 @@ int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags) { - enum drbd_fencing_p fp; struct sib_info sib; sib.sib_reason = SIB_STATE_CHANGE; @@ -1156,14 +1155,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, mdev->p_uuid[UI_FLAGS] &= ~((u64)2); } - fp = FP_DONT_CARE; - if (get_ldev(mdev)) { - rcu_read_lock(); - fp = rcu_dereference(mdev->ldev->disk_conf)->fencing; - rcu_read_unlock(); - put_ldev(mdev); - } - /* Inform userspace about the change... */ drbd_bcast_event(mdev, &sib); From 0cfac5dd904ec8b376beb27f6ad265b12d71bf9e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 10 Nov 2011 12:12:52 +0100 Subject: [PATCH 452/609] drbd: Fixes from the drbd-8.3 branch * drbd-8.3: drbd: fix spurious meta data IO "error" drbd: Fixed a race condition between detach and start of resync drbd: fix harmless race to not trigger an ASSERT drbd: Derive sync-UUIDs only from the bitmap-uuid if it is non-zero drbd: Fixed current UUID generation (regression introduced recently, after 8.3.11) Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 6 +++++- drivers/block/drbd/drbd_nl.c | 2 ++ drivers/block/drbd/drbd_state.c | 5 +++++ drivers/block/drbd/drbd_worker.c | 16 ++++++++++++++-- 4 files changed, 26 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index edd0227f4b4..822fb3d4235 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1087,7 +1087,11 @@ void drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev) D_ASSERT(mdev->state.disk == D_UP_TO_DATE); - uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET; + uuid = mdev->ldev->md.uuid[UI_BITMAP]; + if (uuid && uuid != UUID_JUST_CREATED) + uuid = uuid + UUID_NEW_BM_OFFSET; + else + get_random_bytes(&uuid, sizeof(u64)); drbd_uuid_set(mdev, UI_BITMAP, uuid); drbd_print_uuids(mdev, "updated sync UUID"); drbd_md_sync(mdev); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 16c3710e1b9..c50c1753aa9 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1666,7 +1666,9 @@ static int adm_detach(struct drbd_conf *mdev, int force) } drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ + drbd_md_get_buffer(mdev); /* make sure there is no in-flight meta-data IO */ retcode = drbd_request_state(mdev, NS(disk, D_FAILED)); + drbd_md_put_buffer(mdev); /* D_FAILED will transition to DISKLESS. */ ret = wait_event_interruptible(mdev->misc_wait, mdev->state.disk != D_FAILED); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 4075bd2d251..dffc6973e2b 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1231,6 +1231,11 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, } if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { + if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY && + mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { + drbd_uuid_new_current(mdev); + drbd_send_uuids(mdev); + } /* D_DISKLESS Peer becomes secondary */ if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) /* We may still be Primary ourselves. diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 34a6065d95e..bc2cfd6c600 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -74,10 +74,21 @@ void drbd_md_io_complete(struct bio *bio, int error) md_io->error = error; + /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able + * to timeout on the lower level device, and eventually detach from it. + * If this io completion runs after that timeout expired, this + * drbd_md_put_buffer() may allow us to finally try and re-attach. + * During normal operation, this only puts that extra reference + * down to 1 again. + * Make sure we first drop the reference, and only then signal + * completion, or we may (in drbd_al_read_log()) cycle so fast into the + * next drbd_md_sync_page_io(), that we trigger the + * ASSERT(atomic_read(&mdev->md_io_in_use) == 1) there. + */ + drbd_md_put_buffer(mdev); md_io->done = 1; wake_up(&mdev->misc_wait); bio_put(bio); - drbd_md_put_buffer(mdev); put_ldev(mdev); } @@ -1581,12 +1592,13 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) } clear_bit(B_RS_H_DONE, &mdev->flags); + write_lock_irq(&global_state_lock); if (!get_ldev_if_state(mdev, D_NEGOTIATING)) { + write_unlock_irq(&global_state_lock); mutex_unlock(mdev->state_mutex); return; } - write_lock_irq(&global_state_lock); ns = drbd_read_state(mdev); ns.aftr_isp = !_drbd_may_sync_now(mdev); From 43de7c852b12170a17e397862210d2fc7eb7eaf2 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 10 Nov 2011 13:16:13 +0100 Subject: [PATCH 453/609] drbd: Fixes from the drbd-8.3 branch * drbd-8.3: drbd: O_SYNC gives EIO on ramdisks for some kernels (eg. RHEL6). drbd: send intermediate state change results to the peer Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 ++-- drivers/block/drbd/drbd_main.c | 30 ++++++++++++++++++++++++++---- drivers/block/drbd/drbd_nl.c | 2 +- drivers/block/drbd/drbd_receiver.c | 4 ++-- drivers/block/drbd/drbd_state.c | 18 +++++++++--------- 5 files changed, 40 insertions(+), 18 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index ccf28e986d4..3177e3e6061 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1062,8 +1062,8 @@ extern void drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev); extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); extern int _conn_send_state_req(struct drbd_tconn *, int vnr, enum drbd_packet cmd, union drbd_state, union drbd_state); -extern int _drbd_send_state(struct drbd_conf *mdev); -extern int drbd_send_state(struct drbd_conf *mdev); +extern int drbd_send_state(struct drbd_conf *mdev, union drbd_state s); +extern int drbd_send_current_state(struct drbd_conf *mdev); extern int drbd_send_sync_param(struct drbd_conf *mdev); extern void drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 822fb3d4235..e3186f2dad0 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1148,10 +1148,10 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl } /** - * drbd_send_state() - Sends the drbd state to the peer + * drbd_send_current_state() - Sends the drbd state to the peer * @mdev: DRBD device. */ -int drbd_send_state(struct drbd_conf *mdev) +int drbd_send_current_state(struct drbd_conf *mdev) { struct drbd_socket *sock; struct p_state *p; @@ -1164,6 +1164,29 @@ int drbd_send_state(struct drbd_conf *mdev) return drbd_send_command(mdev, sock, P_STATE, sizeof(*p), NULL, 0); } +/** + * drbd_send_state() - After a state change, sends the new state to the peer + * @mdev: DRBD device. + * @state: the state to send, not necessarily the current state. + * + * Each state change queues an "after_state_ch" work, which will eventually + * send the resulting new state to the peer. If more state changes happen + * between queuing and processing of the after_state_ch work, we still + * want to send each intermediary state in the order it occurred. + */ +int drbd_send_state(struct drbd_conf *mdev, union drbd_state state) +{ + struct drbd_socket *sock; + struct p_state *p; + + sock = &mdev->tconn->data; + p = drbd_prepare_command(mdev, sock); + if (!p) + return -EIO; + p->state = cpu_to_be32(state.i); /* Within the send mutex */ + return drbd_send_command(mdev, sock, P_STATE, sizeof(*p), NULL, 0); +} + int drbd_send_state_req(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val) { struct drbd_socket *sock; @@ -1176,7 +1199,6 @@ int drbd_send_state_req(struct drbd_conf *mdev, union drbd_state mask, union drb p->mask = cpu_to_be32(mask.i); p->val = cpu_to_be32(val.i); return drbd_send_command(mdev, sock, P_STATE_CHG_REQ, sizeof(*p), NULL, 0); - } int conn_send_state_req(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val) @@ -3003,7 +3025,7 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) goto err; } if (magic != DRBD_MD_MAGIC_08) { - if (magic == DRBD_MD_MAGIC_07) + if (magic == DRBD_MD_MAGIC_07) dev_err(DEV, "Found old (0.7) meta data magic. Did you \"drbdadm create-md\"?\n"); else dev_err(DEV, "Meta data magic not found. Did you \"drbdadm create-md\"?\n"); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index c50c1753aa9..b342c95b724 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -654,7 +654,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) /* if this was forced, we should consider sync */ if (forced) drbd_send_uuids(mdev); - drbd_send_state(mdev); + drbd_send_current_state(mdev); } drbd_md_sync(mdev); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e823968f1cd..ed1fa3f7cc4 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -825,7 +825,7 @@ int drbd_connected(struct drbd_conf *mdev) if (!err) err = drbd_send_uuids(mdev); if (!err) - err = drbd_send_state(mdev); + err = drbd_send_current_state(mdev); clear_bit(USE_DEGR_WFC_T, &mdev->flags); clear_bit(RESIZE_PENDING, &mdev->flags); mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */ @@ -3873,7 +3873,7 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) /* Nowadays only used when forcing a node into primary role and setting its disk to UpToDate with that */ drbd_send_uuids(mdev); - drbd_send_state(mdev); + drbd_send_current_state(mdev); } } diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index dffc6973e2b..58c77bbae44 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1198,7 +1198,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS && ns.pdsk != D_UNKNOWN) { /* attach on the peer */ drbd_send_uuids(mdev); - drbd_send_state(mdev); + drbd_send_state(mdev, ns); } /* No point in queuing send_bitmap if we don't have a connection * anymore, so check also the _current_ state, not only the new state @@ -1263,14 +1263,14 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { drbd_send_sizes(mdev, 0, 0); /* to start sync... */ drbd_send_uuids(mdev); - drbd_send_state(mdev); + drbd_send_state(mdev, ns); } /* We want to pause/continue resync, tell peer. */ if (ns.conn >= C_CONNECTED && ((os.aftr_isp != ns.aftr_isp) || (os.user_isp != ns.user_isp))) - drbd_send_state(mdev); + drbd_send_state(mdev, ns); /* In case one of the isp bits got set, suspend other devices. */ if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) && @@ -1280,10 +1280,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* Make sure the peer gets informed about eventual state changes (ISP bits) while we were in WFReportParams. */ if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) - drbd_send_state(mdev); + drbd_send_state(mdev, ns); if (os.conn != C_AHEAD && ns.conn == C_AHEAD) - drbd_send_state(mdev); + drbd_send_state(mdev, ns); /* We are in the progress to start a full sync... */ if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || @@ -1325,7 +1325,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, "ASSERT FAILED: disk is %s during detach\n", drbd_disk_str(mdev->state.disk)); - drbd_send_state(mdev); + drbd_send_state(mdev, ns); drbd_rs_cancel_all(mdev); /* In case we want to get something to stable storage still, @@ -1353,7 +1353,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, mdev->rs_failed = 0; atomic_set(&mdev->rs_pending_cnt, 0); - drbd_send_state(mdev); + drbd_send_state(mdev, ns); /* corresponding get_ldev in __drbd_set_state * this may finally trigger drbd_ldev_destroy. */ put_ldev(mdev); @@ -1361,7 +1361,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* Notify peer that I had a local IO error, and did not detached.. */ if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT) - drbd_send_state(mdev); + drbd_send_state(mdev, ns); /* Disks got bigger while they were detached */ if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && @@ -1379,7 +1379,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* sync target done with resync. Explicitly notify peer, even though * it should (at least for non-empty resyncs) already know itself. */ if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) - drbd_send_state(mdev); + drbd_send_state(mdev, ns); /* This triggers bitmap writeout of potentially still unwritten pages * if the resync finished cleanly, or aborted because of peer disk From 36baf6117b1deee37b9467224a0a14f1bb0863e2 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 10 Nov 2011 14:27:34 +0100 Subject: [PATCH 454/609] drbd: Fixed an obvious copy-n-paste mistake Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/drbd/drbd_worker.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index ed1fa3f7cc4..4cdf8a75d71 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4984,7 +4984,7 @@ static int got_BarrierAck(struct drbd_tconn *tconn, struct packet_info *pi) if (mdev->state.conn == C_AHEAD && atomic_read(&mdev->ap_in_flight) == 0 && - !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) { + !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags)) { mdev->start_resync_timer.expires = jiffies + HZ; add_timer(&mdev->start_resync_timer); } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index bc2cfd6c600..e263a5f4aac 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1518,7 +1518,7 @@ int w_start_resync(struct drbd_work *w, int cancel) } drbd_start_resync(mdev, C_SYNC_SOURCE); - clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags); + clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags); return 0; } From 6936fcb49ab369ad13267e292ec0e3490db91c4a Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 10 Nov 2011 18:45:36 +0100 Subject: [PATCH 455/609] drbd: Move the CREATE_BARRIER flag from connection to device That is necessary since the whole transfer log is per connection(tconn) and not per device(mdev). This bug caused list corruption on the worker list. When a barrier is queued for sending in the context of one device, another device did not see the CREATE_BARRIER bit, and queued the same object again -> list corruption. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 11 +++-------- drivers/block/drbd/drbd_req.c | 19 ++++++++++--------- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 3177e3e6061..e3913307e3d 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -670,7 +670,6 @@ enum { /* flag bits per mdev */ enum { - CREATE_BARRIER, /* next P_DATA is preceded by a P_BARRIER */ UNPLUG_QUEUED, /* only relevant with kernel 2.4 */ UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */ MD_DIRTY, /* current uuids and flags not yet on disk */ @@ -813,6 +812,7 @@ enum { CONN_WD_ST_CHG_OKAY, CONN_WD_ST_CHG_FAIL, CONN_DRY_RUN, /* Expect disconnect after resync handshake. */ + CREATE_BARRIER, /* next P_DATA is preceded by a P_BARRIER */ }; struct drbd_tconn { /* is a resource from the config file */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index e3186f2dad0..e1234065954 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -320,7 +320,7 @@ void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr, mdev = b->w.mdev; nob = b->next; - if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { + if (test_and_clear_bit(CREATE_BARRIER, &tconn->flags)) { _tl_add_barrier(tconn, b); if (nob) tconn->oldest_tle = nob; @@ -381,7 +381,7 @@ void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) if (b->w.cb == NULL) { b->w.cb = w_send_barrier; inc_ap_pending(b->w.mdev); - set_bit(CREATE_BARRIER, &b->w.mdev->flags); + set_bit(CREATE_BARRIER, &tconn->flags); } drbd_queue_work(&tconn->data.work, &b->w); @@ -448,10 +448,8 @@ void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) */ void tl_clear(struct drbd_tconn *tconn) { - struct drbd_conf *mdev; struct list_head *le, *tle; struct drbd_request *r; - int vnr; spin_lock_irq(&tconn->req_lock); @@ -470,10 +468,7 @@ void tl_clear(struct drbd_tconn *tconn) } /* ensure bit indicating barrier is required is clear */ - rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) - clear_bit(CREATE_BARRIER, &mdev->flags); - rcu_read_unlock(); + clear_bit(CREATE_BARRIER, &tconn->flags); spin_unlock_irq(&tconn->req_lock); } diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 42bdf36cef1..ceb04a94aac 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -146,16 +146,17 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const static void queue_barrier(struct drbd_conf *mdev) { struct drbd_tl_epoch *b; + struct drbd_tconn *tconn = mdev->tconn; /* We are within the req_lock. Once we queued the barrier for sending, * we set the CREATE_BARRIER bit. It is cleared as soon as a new * barrier/epoch object is added. This is the only place this bit is * set. It indicates that the barrier for this epoch is already queued, * and no new epoch has been created yet. */ - if (test_bit(CREATE_BARRIER, &mdev->flags)) + if (test_bit(CREATE_BARRIER, &tconn->flags)) return; - b = mdev->tconn->newest_tle; + b = tconn->newest_tle; b->w.cb = w_send_barrier; b->w.mdev = mdev; /* inc_ap_pending done here, so we won't @@ -163,8 +164,8 @@ static void queue_barrier(struct drbd_conf *mdev) * dec_ap_pending will be done in got_BarrierAck * or (on connection loss) in tl_clear. */ inc_ap_pending(mdev); - drbd_queue_work(&mdev->tconn->data.work, &b->w); - set_bit(CREATE_BARRIER, &mdev->flags); + drbd_queue_work(&tconn->data.work, &b->w); + set_bit(CREATE_BARRIER, &tconn->flags); } static void _about_to_complete_local_write(struct drbd_conf *mdev, @@ -479,7 +480,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* see __drbd_make_request, * just after it grabs the req_lock */ - D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0); + D_ASSERT(test_bit(CREATE_BARRIER, &mdev->tconn->flags) == 0); req->epoch = mdev->tconn->newest_tle->br_number; @@ -836,7 +837,7 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s * if we lost that race, we retry. */ if (rw == WRITE && (remote || send_oos) && mdev->tconn->unused_spare_tle == NULL && - test_bit(CREATE_BARRIER, &mdev->flags)) { + test_bit(CREATE_BARRIER, &mdev->tconn->flags)) { allocate_barrier: b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_NOIO); if (!b) { @@ -893,7 +894,7 @@ allocate_barrier: } if (rw == WRITE && (remote || send_oos) && mdev->tconn->unused_spare_tle == NULL && - test_bit(CREATE_BARRIER, &mdev->flags)) { + test_bit(CREATE_BARRIER, &mdev->tconn->flags)) { /* someone closed the current epoch * while we were grabbing the spinlock */ spin_unlock_irq(&mdev->tconn->req_lock); @@ -915,12 +916,12 @@ allocate_barrier: * make sure that, if this is a write request and it triggered a * barrier packet, this request is queued within the same spinlock. */ if ((remote || send_oos) && mdev->tconn->unused_spare_tle && - test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { + test_and_clear_bit(CREATE_BARRIER, &mdev->tconn->flags)) { _tl_add_barrier(mdev->tconn, mdev->tconn->unused_spare_tle); mdev->tconn->unused_spare_tle = NULL; } else { D_ASSERT(!(remote && rw == WRITE && - test_bit(CREATE_BARRIER, &mdev->flags))); + test_bit(CREATE_BARRIER, &mdev->tconn->flags))); } /* NOTE From 4b0007c0e8def19266c767f0410ce81eb39f55c7 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 9 Nov 2011 20:12:34 +0100 Subject: [PATCH 456/609] drbd: Move write_ordering from mdev to tconn This is necessary in order to prepare the move of the (receiver side) epoch list from the device (mdev) to the connection (tconn) objects. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 6 ++- drivers/block/drbd/drbd_main.c | 3 +- drivers/block/drbd/drbd_nl.c | 3 +- drivers/block/drbd/drbd_proc.c | 2 +- drivers/block/drbd/drbd_receiver.c | 65 ++++++++++++++++++------------ 5 files changed, 48 insertions(+), 31 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e3913307e3d..473694605da 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -858,6 +858,8 @@ struct drbd_tconn { /* is a resource from the config file */ void *int_dig_in; void *int_dig_vv; + enum write_ordering_e write_ordering; + struct drbd_thread receiver; struct drbd_thread worker; struct drbd_thread asender; @@ -962,7 +964,7 @@ struct drbd_conf { struct drbd_epoch *current_epoch; spinlock_t epoch_lock; unsigned int epochs; - enum write_ordering_e write_ordering; + struct list_head active_ee; /* IO in progress (P_DATA gets written to disk) */ struct list_head sync_ee; /* IO in progress (P_RS_DATA_REPLY gets written to disk) */ struct list_head done_ee; /* need to send P_WRITE_ACK */ @@ -1539,7 +1541,7 @@ static inline void drbd_tcp_quickack(struct socket *sock) (char*)&val, sizeof(val)); } -void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo); +void drbd_bump_write_ordering(struct drbd_tconn *tconn, enum write_ordering_e wo); /* drbd_proc.c */ extern struct proc_dir_entry *drbd_proc; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index e1234065954..7b1cad895d1 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2130,7 +2130,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) init_waitqueue_head(&mdev->al_wait); init_waitqueue_head(&mdev->seq_wait); - mdev->write_ordering = WO_bdev_flush; mdev->resync_wenr = LC_FREE; mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; mdev->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; @@ -2625,6 +2624,8 @@ struct drbd_tconn *conn_create(const char *name, struct res_opts *res_opts) if (!tl_init(tconn)) goto fail; + tconn->write_ordering = WO_bdev_flush; + tconn->cstate = C_STANDALONE; mutex_init(&tconn->cstate_mutex); spin_lock_init(&tconn->req_lock); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index b342c95b724..b369dd112df 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1497,8 +1497,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) new_disk_conf = NULL; new_plan = NULL; - mdev->write_ordering = WO_bdev_flush; - drbd_bump_write_ordering(mdev, WO_bdev_flush); + drbd_bump_write_ordering(mdev->tconn, WO_bdev_flush); if (drbd_md_test_flag(mdev->ldev, MDF_CRASHED_PRIMARY)) set_bit(CRASHED_PRIMARY, &mdev->flags); diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 7e68d99e9c9..1321192b377 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -272,7 +272,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v) atomic_read(&mdev->unacked_cnt), atomic_read(&mdev->ap_bio_cnt), mdev->epochs, - write_ordering_chars[mdev->write_ordering] + write_ordering_chars[mdev->tconn->write_ordering] ); seq_printf(seq, " oos:%llu\n", Bit2KB((unsigned long long) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 4cdf8a75d71..f6d1ff2e3ab 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1089,21 +1089,29 @@ static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi) return err; } -static void drbd_flush(struct drbd_conf *mdev) +static void drbd_flush(struct drbd_tconn *tconn) { int rv; + struct drbd_conf *mdev; + int vnr; - if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) { - rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL, - NULL); - if (rv) { - dev_info(DEV, "local disk flush failed with status %d\n", rv); - /* would rather check on EOPNOTSUPP, but that is not reliable. - * don't try again for ANY return value != 0 - * if (rv == -EOPNOTSUPP) */ - drbd_bump_write_ordering(mdev, WO_drain_io); + if (tconn->write_ordering >= WO_bdev_flush) { + idr_for_each_entry(&tconn->volumes, mdev, vnr) { + if (get_ldev(mdev)) { + rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL, + NULL); + put_ldev(mdev); + + if (rv) { + dev_info(DEV, "local disk flush failed with status %d\n", rv); + /* would rather check on EOPNOTSUPP, but that is not reliable. + * don't try again for ANY return value != 0 + * if (rv == -EOPNOTSUPP) */ + drbd_bump_write_ordering(tconn, WO_drain_io); + break; + } + } } - put_ldev(mdev); } } @@ -1182,32 +1190,39 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, /** * drbd_bump_write_ordering() - Fall back to an other write ordering method - * @mdev: DRBD device. + * @tconn: DRBD connection. * @wo: Write ordering method to try. */ -void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local) +void drbd_bump_write_ordering(struct drbd_tconn *tconn, enum write_ordering_e wo) { struct disk_conf *dc; + struct drbd_conf *mdev; enum write_ordering_e pwo; + int vnr; static char *write_ordering_str[] = { [WO_none] = "none", [WO_drain_io] = "drain", [WO_bdev_flush] = "flush", }; - pwo = mdev->write_ordering; + pwo = tconn->write_ordering; wo = min(pwo, wo); rcu_read_lock(); - dc = rcu_dereference(mdev->ldev->disk_conf); + idr_for_each_entry(&tconn->volumes, mdev, vnr) { + if (!get_ldev(mdev)) + continue; + dc = rcu_dereference(mdev->ldev->disk_conf); - if (wo == WO_bdev_flush && !dc->disk_flushes) - wo = WO_drain_io; - if (wo == WO_drain_io && !dc->disk_drain) - wo = WO_none; + if (wo == WO_bdev_flush && !dc->disk_flushes) + wo = WO_drain_io; + if (wo == WO_drain_io && !dc->disk_drain) + wo = WO_none; + put_ldev(mdev); + } rcu_read_unlock(); - mdev->write_ordering = wo; - if (pwo != mdev->write_ordering || wo == WO_bdev_flush) - dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]); + tconn->write_ordering = wo; + if (pwo != tconn->write_ordering || wo == WO_bdev_flush) + conn_info(tconn, "Method to ensure write ordering: %s\n", write_ordering_str[tconn->write_ordering]); } /** @@ -1341,7 +1356,7 @@ static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi) * R_PRIMARY crashes now. * Therefore we must send the barrier_ack after the barrier request was * completed. */ - switch (mdev->write_ordering) { + switch (tconn->write_ordering) { case WO_none: if (rv == FE_RECYCLED) return 0; @@ -1358,7 +1373,7 @@ static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi) case WO_bdev_flush: case WO_drain_io: drbd_wait_ee_list_empty(mdev, &mdev->active_ee); - drbd_flush(mdev); + drbd_flush(tconn); if (atomic_read(&mdev->current_epoch->epoch_size)) { epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO); @@ -1374,7 +1389,7 @@ static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi) return 0; default: - dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering); + dev_err(DEV, "Strangeness in tconn->write_ordering %d\n", tconn->write_ordering); return -EIO; } From 1d2783d532207531ba8e3bfb016a4512dec97666 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 10 Nov 2011 14:56:07 +0100 Subject: [PATCH 457/609] drbd: Prepare epochs per connection An epoch object needs a pointer to the mdev it was received for. This is necessary to be able to send the barrier ack packet for the same volume as the original barrier packet was assigned to. This prepares the next step, in which the (receiver side) epoch list is moved from the device (mdev) to the connection (tconn) object. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_receiver.c | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 473694605da..0db20cbb423 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -585,6 +585,7 @@ struct drbd_tl_epoch { }; struct drbd_epoch { + struct drbd_conf *mdev; struct list_head list; unsigned int barrier_nr; atomic_t epoch_size; /* increased on every request added. */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index f6d1ff2e3ab..cc5e0b6a88e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1152,11 +1152,11 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) { if (!(ev & EV_CLEANUP)) { spin_unlock(&mdev->epoch_lock); - drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size); + drbd_send_b_ack(epoch->mdev, epoch->barrier_nr, epoch_size); spin_lock(&mdev->epoch_lock); } if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) - dec_unacked(mdev); + dec_unacked(epoch->mdev); if (mdev->current_epoch != epoch) { next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list); @@ -1349,6 +1349,7 @@ static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi) inc_unacked(mdev); mdev->current_epoch->barrier_nr = p->barrier; + mdev->current_epoch->mdev = mdev; rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR); /* P_BARRIER_ACK may imply that the corresponding extent is dropped from From 12038a3a71ce6fabbcc2956cc8697fcbf729be57 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 9 Nov 2011 19:18:00 +0100 Subject: [PATCH 458/609] drbd: Move list of epochs from mdev to tconn This is necessary since the transfer_log on the sending is also per tconn. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 6 ++-- drivers/block/drbd/drbd_main.c | 25 +++++++------- drivers/block/drbd/drbd_proc.c | 2 +- drivers/block/drbd/drbd_receiver.c | 52 ++++++++++++++++-------------- 4 files changed, 42 insertions(+), 43 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 0db20cbb423..94ec6bfb62a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -859,6 +859,9 @@ struct drbd_tconn { /* is a resource from the config file */ void *int_dig_in; void *int_dig_vv; + struct drbd_epoch *current_epoch; + spinlock_t epoch_lock; + unsigned int epochs; enum write_ordering_e write_ordering; struct drbd_thread receiver; @@ -962,9 +965,6 @@ struct drbd_conf { int open_cnt; u64 *p_uuid; - struct drbd_epoch *current_epoch; - spinlock_t epoch_lock; - unsigned int epochs; struct list_head active_ee; /* IO in progress (P_DATA gets written to disk) */ struct list_head sync_ee; /* IO in progress (P_RS_DATA_REPLY gets written to disk) */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 7b1cad895d1..8b99f4e28cc 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2082,7 +2082,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) spin_lock_init(&mdev->al_lock); spin_lock_init(&mdev->peer_seq_lock); - spin_lock_init(&mdev->epoch_lock); INIT_LIST_HEAD(&mdev->active_ee); INIT_LIST_HEAD(&mdev->sync_ee); @@ -2142,9 +2141,6 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev) dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n", mdev->tconn->receiver.t_state); - /* no need to lock it, I'm the only thread alive */ - if (atomic_read(&mdev->current_epoch->epoch_size) != 0) - dev_err(DEV, "epoch_size:%d\n", atomic_read(&mdev->current_epoch->epoch_size)); mdev->al_writ_cnt = mdev->bm_writ_cnt = mdev->read_cnt = @@ -2377,7 +2373,6 @@ void drbd_minor_destroy(struct kref *kref) kfree(mdev->p_uuid); /* mdev->p_uuid = NULL; */ - kfree(mdev->current_epoch); if (mdev->bitmap) /* should no longer be there. */ drbd_bm_cleanup(mdev); __free_page(mdev->md_io_page); @@ -2624,6 +2619,12 @@ struct drbd_tconn *conn_create(const char *name, struct res_opts *res_opts) if (!tl_init(tconn)) goto fail; + tconn->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL); + if (!tconn->current_epoch) + goto fail; + INIT_LIST_HEAD(&tconn->current_epoch->list); + tconn->epochs = 1; + spin_lock_init(&tconn->epoch_lock); tconn->write_ordering = WO_bdev_flush; tconn->cstate = C_STANDALONE; @@ -2649,6 +2650,7 @@ struct drbd_tconn *conn_create(const char *name, struct res_opts *res_opts) return tconn; fail: + kfree(tconn->current_epoch); tl_cleanup(tconn); free_cpumask_var(tconn->cpu_mask); drbd_free_socket(&tconn->meta); @@ -2663,6 +2665,10 @@ void conn_destroy(struct kref *kref) { struct drbd_tconn *tconn = container_of(kref, struct drbd_tconn, kref); + if (atomic_read(&tconn->current_epoch->epoch_size) != 0) + conn_err(tconn, "epoch_size:%d\n", atomic_read(&tconn->current_epoch->epoch_size)); + kfree(tconn->current_epoch); + idr_destroy(&tconn->volumes); free_cpumask_var(tconn->cpu_mask); @@ -2744,13 +2750,6 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, mdev->read_requests = RB_ROOT; mdev->write_requests = RB_ROOT; - mdev->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL); - if (!mdev->current_epoch) - goto out_no_epoch; - - INIT_LIST_HEAD(&mdev->current_epoch->list); - mdev->epochs = 1; - if (!idr_pre_get(&minors, GFP_KERNEL)) goto out_no_minor_idr; if (idr_get_new_above(&minors, mdev, minor, &minor_got)) @@ -2786,8 +2785,6 @@ out_idr_remove_minor: idr_remove(&minors, minor_got); synchronize_rcu(); out_no_minor_idr: - kfree(mdev->current_epoch); -out_no_epoch: drbd_bm_cleanup(mdev); out_no_bitmap: __free_page(mdev->md_io_page); diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 1321192b377..64e0ddbf066 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -271,7 +271,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v) atomic_read(&mdev->rs_pending_cnt), atomic_read(&mdev->unacked_cnt), atomic_read(&mdev->ap_bio_cnt), - mdev->epochs, + mdev->tconn->epochs, write_ordering_chars[mdev->tconn->write_ordering] ); seq_printf(seq, " oos:%llu\n", diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index cc5e0b6a88e..0e8e256579e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1128,8 +1128,9 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, int epoch_size; struct drbd_epoch *next_epoch; enum finish_epoch rv = FE_STILL_LIVE; + struct drbd_tconn *tconn = mdev->tconn; - spin_lock(&mdev->epoch_lock); + spin_lock(&tconn->epoch_lock); do { next_epoch = NULL; @@ -1151,18 +1152,18 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, atomic_read(&epoch->active) == 0 && (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) { if (!(ev & EV_CLEANUP)) { - spin_unlock(&mdev->epoch_lock); + spin_unlock(&tconn->epoch_lock); drbd_send_b_ack(epoch->mdev, epoch->barrier_nr, epoch_size); - spin_lock(&mdev->epoch_lock); + spin_lock(&tconn->epoch_lock); } if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) dec_unacked(epoch->mdev); - if (mdev->current_epoch != epoch) { + if (tconn->current_epoch != epoch) { next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list); list_del(&epoch->list); ev = EV_BECAME_LAST | (ev & EV_CLEANUP); - mdev->epochs--; + tconn->epochs--; kfree(epoch); if (rv == FE_STILL_LIVE) @@ -1183,7 +1184,7 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, epoch = next_epoch; } while (1); - spin_unlock(&mdev->epoch_lock); + spin_unlock(&tconn->epoch_lock); return rv; } @@ -1348,9 +1349,9 @@ static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi) inc_unacked(mdev); - mdev->current_epoch->barrier_nr = p->barrier; - mdev->current_epoch->mdev = mdev; - rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR); + tconn->current_epoch->barrier_nr = p->barrier; + tconn->current_epoch->mdev = mdev; + rv = drbd_may_finish_epoch(mdev, tconn->current_epoch, EV_GOT_BARRIER_NR); /* P_BARRIER_ACK may imply that the corresponding extent is dropped from * the activity log, which means it would not be resynced in case the @@ -1376,13 +1377,13 @@ static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi) drbd_wait_ee_list_empty(mdev, &mdev->active_ee); drbd_flush(tconn); - if (atomic_read(&mdev->current_epoch->epoch_size)) { + if (atomic_read(&tconn->current_epoch->epoch_size)) { epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO); if (epoch) break; } - epoch = mdev->current_epoch; + epoch = tconn->current_epoch; wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0); D_ASSERT(atomic_read(&epoch->active) == 0); @@ -1398,16 +1399,16 @@ static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi) atomic_set(&epoch->epoch_size, 0); atomic_set(&epoch->active, 0); - spin_lock(&mdev->epoch_lock); - if (atomic_read(&mdev->current_epoch->epoch_size)) { - list_add(&epoch->list, &mdev->current_epoch->list); - mdev->current_epoch = epoch; - mdev->epochs++; + spin_lock(&tconn->epoch_lock); + if (atomic_read(&tconn->current_epoch->epoch_size)) { + list_add(&epoch->list, &tconn->current_epoch->list); + tconn->current_epoch = epoch; + tconn->epochs++; } else { /* The current_epoch got recycled while we allocated this one... */ kfree(epoch); } - spin_unlock(&mdev->epoch_lock); + spin_unlock(&tconn->epoch_lock); return 0; } @@ -2103,7 +2104,7 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi) err = wait_for_and_update_peer_seq(mdev, peer_seq); drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size); - atomic_inc(&mdev->current_epoch->epoch_size); + atomic_inc(&tconn->current_epoch->epoch_size); err2 = drbd_drain_block(mdev, pi->size); if (!err) err = err2; @@ -2131,11 +2132,11 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi) if (dp_flags & DP_MAY_SET_IN_SYNC) peer_req->flags |= EE_MAY_SET_IN_SYNC; - spin_lock(&mdev->epoch_lock); - peer_req->epoch = mdev->current_epoch; + spin_lock(&tconn->epoch_lock); + peer_req->epoch = tconn->current_epoch; atomic_inc(&peer_req->epoch->epoch_size); atomic_inc(&peer_req->epoch->active); - spin_unlock(&mdev->epoch_lock); + spin_unlock(&tconn->epoch_lock); rcu_read_lock(); tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries; @@ -4359,6 +4360,11 @@ static void conn_disconnect(struct drbd_tconn *tconn) } rcu_read_unlock(); + if (!list_empty(&tconn->current_epoch->list)) + conn_err(tconn, "ASSERTION FAILED: tconn->current_epoch->list not empty\n"); + /* ok, no more ee's on the fly, it is safe to reset the epoch_size */ + atomic_set(&tconn->current_epoch->epoch_size, 0); + conn_info(tconn, "Connection closed\n"); if (conn_highest_role(tconn) == R_PRIMARY && conn_highest_pdsk(tconn) >= D_UNKNOWN) @@ -4446,10 +4452,6 @@ static int drbd_disconnected(struct drbd_conf *mdev) D_ASSERT(list_empty(&mdev->sync_ee)); D_ASSERT(list_empty(&mdev->done_ee)); - /* ok, no more ee's on the fly, it is safe to reset the epoch_size */ - atomic_set(&mdev->current_epoch->epoch_size, 0); - D_ASSERT(list_empty(&mdev->current_epoch->list)); - return 0; } From 1e9dd2912e3423ebf3c6ab7d824b13205310bfc9 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 10 Nov 2011 15:14:53 +0100 Subject: [PATCH 459/609] drbd: Switch drbd_may_finish_epoch() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 0e8e256579e..6675e2c4e4f 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -65,7 +65,7 @@ static int drbd_do_features(struct drbd_tconn *tconn); static int drbd_do_auth(struct drbd_tconn *tconn); static int drbd_disconnected(struct drbd_conf *mdev); -static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event); +static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *, struct drbd_epoch *, enum epoch_event); static int e_end_block(struct drbd_work *, int); @@ -1121,14 +1121,13 @@ static void drbd_flush(struct drbd_tconn *tconn) * @epoch: Epoch object. * @ev: Epoch event. */ -static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, +static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *tconn, struct drbd_epoch *epoch, enum epoch_event ev) { int epoch_size; struct drbd_epoch *next_epoch; enum finish_epoch rv = FE_STILL_LIVE; - struct drbd_tconn *tconn = mdev->tconn; spin_lock(&tconn->epoch_lock); do { @@ -1174,7 +1173,6 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, /* atomic_set(&epoch->active, 0); is already zero */ if (rv == FE_STILL_LIVE) rv = FE_RECYCLED; - wake_up(&mdev->ee_wait); } } @@ -1351,7 +1349,7 @@ static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi) tconn->current_epoch->barrier_nr = p->barrier; tconn->current_epoch->mdev = mdev; - rv = drbd_may_finish_epoch(mdev, tconn->current_epoch, EV_GOT_BARRIER_NR); + rv = drbd_may_finish_epoch(tconn, tconn->current_epoch, EV_GOT_BARRIER_NR); /* P_BARRIER_ACK may imply that the corresponding extent is dropped from * the activity log, which means it would not be resynced in case the @@ -1801,7 +1799,7 @@ static int e_end_block(struct drbd_work *w, int cancel) } else D_ASSERT(drbd_interval_empty(&peer_req->i)); - drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); + drbd_may_finish_epoch(mdev->tconn, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); return err; } @@ -2209,7 +2207,7 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi) drbd_al_complete_io(mdev, &peer_req->i); out_interrupted: - drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP); + drbd_may_finish_epoch(tconn, peer_req->epoch, EV_PUT + EV_CLEANUP); put_ldev(mdev); drbd_free_peer_req(mdev, peer_req); return err; From 77fede5137574813e415a4cf23038b6688ba2470 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 10 Nov 2011 21:19:11 +0100 Subject: [PATCH 460/609] drbd: Fix the WO=drain implementation for multiple volumes Wait until IO is drained in all volumes. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6675e2c4e4f..be8f469bc8f 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1334,6 +1334,22 @@ static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev, wake_up(&mdev->misc_wait); } +void conn_wait_active_ee_empty(struct drbd_tconn *tconn) +{ + struct drbd_conf *mdev; + int vnr; + + rcu_read_lock(); + idr_for_each_entry(&tconn->volumes, mdev, vnr) { + kref_get(&mdev->kref); + rcu_read_unlock(); + drbd_wait_ee_list_empty(mdev, &mdev->active_ee); + kref_put(&mdev->kref, &drbd_minor_destroy); + rcu_read_lock(); + } + rcu_read_unlock(); +} + static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi) { struct drbd_conf *mdev; @@ -1372,7 +1388,7 @@ static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi) case WO_bdev_flush: case WO_drain_io: - drbd_wait_ee_list_empty(mdev, &mdev->active_ee); + conn_wait_active_ee_empty(tconn); drbd_flush(tconn); if (atomic_read(&tconn->current_epoch->epoch_size)) { From 975b297947c85a1cb687d7561b6fc05d48160026 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 17 Nov 2011 10:11:47 +0100 Subject: [PATCH 461/609] drbd: fix potential spinlock deadlock drbd_try_clear_on_disk_bm() has a sanity check for the number of blocks left to be resynced (rs_left) in the current resync extent. If it detects a mismatch, it complains, and forces a disconnect using drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); Unfortunately, this may be called while holding the req_lock, and drbd_force_state() want's to aquire that lock itself. Deadlock. Don't force a disconnect, but fix up rs_left by recounting and reassigning the number of dirty blocks in that extent. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index ade79be4e27..d69fb7d6181 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -573,16 +573,20 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, else ext->rs_failed += count; if (ext->rs_left < ext->rs_failed) { - dev_err(DEV, "BAD! sector=%llus enr=%u rs_left=%d " - "rs_failed=%d count=%d\n", + dev_warn(DEV, "BAD! sector=%llus enr=%u rs_left=%d " + "rs_failed=%d count=%d cstate=%s\n", (unsigned long long)sector, ext->lce.lc_number, ext->rs_left, - ext->rs_failed, count); - dump_stack(); + ext->rs_failed, count, + drbd_conn_str(mdev->state.conn)); - lc_put(mdev->resync, &ext->lce); - conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); - return; + /* We don't expect to be able to clear more bits + * than have been set when we originally counted + * the set bits to cache that value in ext->rs_left. + * Whatever the reason (disconnect during resync, + * delayed local completion of an application write), + * try to fix it up by recounting here. */ + ext->rs_left = drbd_bm_e_weight(mdev, enr); } } else { /* Normally this element should be in the cache, From 03d63e1d1e705fdba4c0f6a3a119cc42504bb9ed Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 17 Nov 2011 11:32:01 +0100 Subject: [PATCH 462/609] drbd: Remove leftover prototype Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 94ec6bfb62a..d397681fb7a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1063,8 +1063,6 @@ extern int drbd_send_uuids(struct drbd_conf *mdev); extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); extern void drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev); extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); -extern int _conn_send_state_req(struct drbd_tconn *, int vnr, enum drbd_packet cmd, - union drbd_state, union drbd_state); extern int drbd_send_state(struct drbd_conf *mdev, union drbd_state s); extern int drbd_send_current_state(struct drbd_conf *mdev); extern int drbd_send_sync_param(struct drbd_conf *mdev); From 615e087fbd7483fafa28c8a1a4d1656251e0604d Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 17 Nov 2011 14:32:12 +0100 Subject: [PATCH 463/609] drbd: add missing rcu locks around recently introduced idr_for_each Recent commit drbd: Move write_ordering from mdev to tconn introduced a new idr_for_each loop over all volumes, but did not take necessary rcu locks or krefs. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 32 +++++++++++++++++++----------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index be8f469bc8f..9c888e5b648 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1096,22 +1096,30 @@ static void drbd_flush(struct drbd_tconn *tconn) int vnr; if (tconn->write_ordering >= WO_bdev_flush) { + rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) { - if (get_ldev(mdev)) { - rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL, - NULL); - put_ldev(mdev); + if (!get_ldev(mdev)) + continue; + kref_get(&mdev->kref); + rcu_read_unlock(); - if (rv) { - dev_info(DEV, "local disk flush failed with status %d\n", rv); - /* would rather check on EOPNOTSUPP, but that is not reliable. - * don't try again for ANY return value != 0 - * if (rv == -EOPNOTSUPP) */ - drbd_bump_write_ordering(tconn, WO_drain_io); - break; - } + rv = blkdev_issue_flush(mdev->ldev->backing_bdev, + GFP_NOIO, NULL); + if (rv) { + dev_info(DEV, "local disk flush failed with status %d\n", rv); + /* would rather check on EOPNOTSUPP, but that is not reliable. + * don't try again for ANY return value != 0 + * if (rv == -EOPNOTSUPP) */ + drbd_bump_write_ordering(tconn, WO_drain_io); } + put_ldev(mdev); + kref_put(&mdev->kref, &drbd_minor_destroy); + + rcu_read_lock(); + if (rv) + break; } + rcu_read_unlock(); } } From d10b4ea32bf2b77a3d56a20992cd549978df7b38 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 30 Nov 2011 23:25:36 +0100 Subject: [PATCH 464/609] drbd: Get rid of "ASSERTION FAILED: tconn->current_epoch->list not empty" Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 9c888e5b648..e546dd3fab8 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4440,6 +4440,11 @@ static int drbd_disconnected(struct drbd_conf *mdev) drbd_finish_peer_reqs(mdev); + /* This second workqueue flush is necessary, since drbd_finish_peer_reqs() + might have issued a work again. The one before drbd_finish_peer_reqs() is + necessary to reclain net_ee in drbd_finish_peer_reqs(). */ + drbd_flush_workqueue(mdev); + kfree(mdev->p_uuid); mdev->p_uuid = NULL; From 380207d08e7c4d1b19c0323777278992b4fbf9d6 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 11 Nov 2011 12:31:20 +0100 Subject: [PATCH 465/609] drbd: Load balancing of read requests New config option for the disk secition "read-balancing", with the values: prefer-local, prefer-remote, round-robin, when-congested-remote. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/drbd/drbd_req.c | 57 +++++++++++++++++++++++++++++- include/linux/drbd.h | 8 +++++ include/linux/drbd_genl.h | 1 + include/linux/drbd_limits.h | 1 + 6 files changed, 68 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index d397681fb7a..e2cccb40f5a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -698,6 +698,7 @@ enum { AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */ B_RS_H_DONE, /* Before resync handler done (already executed) */ DISCARD_MY_DATA, /* discard_my_data flag per volume */ + READ_BALANCE_RR, }; struct drbd_bitmap; /* opaque for drbd_conf */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e546dd3fab8..733b8bd663d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4974,7 +4974,7 @@ static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi) update_peer_seq(mdev, be32_to_cpu(p->seq_num)); - dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n", + dev_err(DEV, "Got NegDReply; Sector %llus, len %u.\n", (unsigned long long)sector, be32_to_cpu(p->blksize)); return validate_req_change_req_state(mdev, p->block_id, sector, diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index ceb04a94aac..98251e2a7fb 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -563,6 +563,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE) atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); + if (!(req->rq_state & RQ_WRITE) && + mdev->state.disk == D_UP_TO_DATE && + !IS_ERR_OR_NULL(req->private_bio)) + goto goto_read_retry_local; + /* if it is still queued, we may not complete it here. * it will be canceled soon. */ if (!(req->rq_state & RQ_NET_QUEUED)) @@ -625,10 +630,22 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); req->rq_state |= RQ_NET_DONE; + + if (!(req->rq_state & RQ_WRITE) && + mdev->state.disk == D_UP_TO_DATE && + !IS_ERR_OR_NULL(req->private_bio)) + goto goto_read_retry_local; + _req_may_be_done_not_susp(req, m); /* else: done by HANDED_OVER_TO_NETWORK */ break; + goto_read_retry_local: + req->rq_state |= RQ_LOCAL_PENDING; + req->private_bio->bi_bdev = mdev->ldev->backing_bdev; + generic_make_request(req->private_bio); + break; + case FAIL_FROZEN_DISK_IO: if (!(req->rq_state & RQ_LOCAL_COMPLETED)) break; @@ -689,6 +706,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, dec_ap_pending(mdev); req->rq_state &= ~RQ_NET_PENDING; req->rq_state |= (RQ_NET_OK|RQ_NET_DONE); + if (!IS_ERR_OR_NULL(req->private_bio)) { + bio_put(req->private_bio); + req->private_bio = NULL; + put_ldev(mdev); + } _req_may_be_done_not_susp(req, m); break; }; @@ -723,6 +745,35 @@ static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int return drbd_bm_count_bits(mdev, sbnr, ebnr) == 0; } +static bool remote_due_to_read_balancing(struct drbd_conf *mdev) +{ + enum drbd_read_balancing rbm; + struct backing_dev_info *bdi; + + if (mdev->state.pdsk < D_UP_TO_DATE) + return false; + + rcu_read_lock(); + rbm = rcu_dereference(mdev->ldev->disk_conf)->read_balancing; + rcu_read_unlock(); + + switch (rbm) { + case RB_CONGESTED_REMOTE: + bdi = &mdev->ldev->backing_bdev->bd_disk->queue->backing_dev_info; + return bdi_read_congested(bdi); + case RB_LEAST_PENDING: + return atomic_read(&mdev->local_cnt) > + atomic_read(&mdev->ap_pending_cnt) + atomic_read(&mdev->rs_pending_cnt); + case RB_ROUND_ROBIN: + return test_and_change_bit(READ_BALANCE_RR, &mdev->flags); + case RB_PREFER_REMOTE: + return true; + case RB_PREFER_LOCAL: + default: + return false; + } +} + /* * complete_conflicting_writes - wait for any conflicting write requests * @@ -790,6 +841,10 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s bio_put(req->private_bio); req->private_bio = NULL; put_ldev(mdev); + } else if (remote_due_to_read_balancing(mdev)) { + /* Keep the private bio in case we need it + for a local retry */ + local = 0; } } remote = !local && mdev->state.pdsk >= D_UP_TO_DATE; @@ -1017,7 +1072,7 @@ fail_free_complete: if (req->rq_state & RQ_IN_ACT_LOG) drbd_al_complete_io(mdev, &req->i); fail_and_free_req: - if (local) { + if (!IS_ERR_OR_NULL(req->private_bio)) { bio_put(req->private_bio); req->private_bio = NULL; put_ldev(mdev); diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 1e9f754b66a..157ba3d74dc 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -102,6 +102,14 @@ enum drbd_on_congestion { OC_DISCONNECT, }; +enum drbd_read_balancing { + RB_PREFER_LOCAL, + RB_PREFER_REMOTE, + RB_ROUND_ROBIN, + RB_LEAST_PENDING, + RB_CONGESTED_REMOTE, +}; + /* KEEP the order, do not delete or insert. Only append. */ enum drbd_ret_code { ERR_CODE_BASE = 100, diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 2e6cefefe5e..826008f297f 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -129,6 +129,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) __u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF) + __u32_field_def(21, 0 /* OPTIONAL */, read_balancing, DRBD_READ_BALANCING_DEF) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 6d0a24331ed..17ef66a5c11 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -161,6 +161,7 @@ #define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT #define DRBD_ON_NO_DATA_DEF OND_IO_ERROR #define DRBD_ON_CONGESTION_DEF OC_BLOCK +#define DRBD_READ_BALANCING_DEF RB_PREFER_LOCAL #define DRBD_MAX_BIO_BVECS_MIN 0 #define DRBD_MAX_BIO_BVECS_MAX 128 From d60de03a6694302b691bdf858ede9cbdfb7112d6 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 17 Nov 2011 10:12:31 +0100 Subject: [PATCH 466/609] drbd: Load balancing method: striping Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 13 +++++++++++-- include/linux/drbd.h | 6 ++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 98251e2a7fb..5b28de0c596 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -745,10 +745,11 @@ static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int return drbd_bm_count_bits(mdev, sbnr, ebnr) == 0; } -static bool remote_due_to_read_balancing(struct drbd_conf *mdev) +static bool remote_due_to_read_balancing(struct drbd_conf *mdev, sector_t sector) { enum drbd_read_balancing rbm; struct backing_dev_info *bdi; + int stripe_shift; if (mdev->state.pdsk < D_UP_TO_DATE) return false; @@ -764,6 +765,14 @@ static bool remote_due_to_read_balancing(struct drbd_conf *mdev) case RB_LEAST_PENDING: return atomic_read(&mdev->local_cnt) > atomic_read(&mdev->ap_pending_cnt) + atomic_read(&mdev->rs_pending_cnt); + case RB_32K_STRIPING: /* stripe_shift = 15 */ + case RB_64K_STRIPING: + case RB_128K_STRIPING: + case RB_256K_STRIPING: + case RB_512K_STRIPING: + case RB_1M_STRIPING: /* stripe_shift = 20 */ + stripe_shift = (rbm - RB_32K_STRIPING + 15); + return (sector >> (stripe_shift - 9)) & 1; case RB_ROUND_ROBIN: return test_and_change_bit(READ_BALANCE_RR, &mdev->flags); case RB_PREFER_REMOTE: @@ -841,7 +850,7 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s bio_put(req->private_bio); req->private_bio = NULL; put_ldev(mdev); - } else if (remote_due_to_read_balancing(mdev)) { + } else if (remote_due_to_read_balancing(mdev, sector)) { /* Keep the private bio in case we need it for a local retry */ local = 0; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 157ba3d74dc..1e86156c10f 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -108,6 +108,12 @@ enum drbd_read_balancing { RB_ROUND_ROBIN, RB_LEAST_PENDING, RB_CONGESTED_REMOTE, + RB_32K_STRIPING, + RB_64K_STRIPING, + RB_128K_STRIPING, + RB_256K_STRIPING, + RB_512K_STRIPING, + RB_1M_STRIPING, }; /* KEEP the order, do not delete or insert. Only append. */ From 57bcb6cf1ddb1593face20a13b140be19af9f6cd Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Sat, 3 Dec 2011 11:18:56 +0100 Subject: [PATCH 467/609] drbd: Do not call generic_make_request() while holding req_lock Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 44 +++++++++++++++++------------------ 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 5b28de0c596..b1957d6c8bf 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -31,6 +31,8 @@ #include "drbd_req.h" +static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size); + /* Update disk stats at start of I/O request */ static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req, struct bio *bio) { @@ -558,20 +560,21 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* assert something? */ if (req->rq_state & RQ_NET_PENDING) dec_ap_pending(mdev); + + p = !(req->rq_state & RQ_WRITE) && req->rq_state & RQ_NET_PENDING; + req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); req->rq_state |= RQ_NET_DONE; if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE) atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); - if (!(req->rq_state & RQ_WRITE) && - mdev->state.disk == D_UP_TO_DATE && - !IS_ERR_OR_NULL(req->private_bio)) - goto goto_read_retry_local; - /* if it is still queued, we may not complete it here. * it will be canceled soon. */ - if (!(req->rq_state & RQ_NET_QUEUED)) + if (!(req->rq_state & RQ_NET_QUEUED)) { + if (p) + goto goto_read_retry_local; _req_may_be_done(req, m); /* Allowed while state.susp */ + } break; case WRITE_ACKED_BY_PEER_AND_SIS: @@ -631,9 +634,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state |= RQ_NET_DONE; - if (!(req->rq_state & RQ_WRITE) && - mdev->state.disk == D_UP_TO_DATE && - !IS_ERR_OR_NULL(req->private_bio)) + if (!(req->rq_state & RQ_WRITE)) goto goto_read_retry_local; _req_may_be_done_not_susp(req, m); @@ -641,9 +642,16 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; goto_read_retry_local: + if (!drbd_may_do_local_read(mdev, req->i.sector, req->i.size)) { + _req_may_be_done_not_susp(req, m); + break; + } + D_ASSERT(!(req->rq_state & RQ_LOCAL_PENDING)); req->rq_state |= RQ_LOCAL_PENDING; - req->private_bio->bi_bdev = mdev->ldev->backing_bdev; - generic_make_request(req->private_bio); + + get_ldev(mdev); + req->w.cb = w_restart_disk_io; + drbd_queue_work(&mdev->tconn->data.work, &req->w); break; case FAIL_FROZEN_DISK_IO: @@ -706,11 +714,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, dec_ap_pending(mdev); req->rq_state &= ~RQ_NET_PENDING; req->rq_state |= (RQ_NET_OK|RQ_NET_DONE); - if (!IS_ERR_OR_NULL(req->private_bio)) { - bio_put(req->private_bio); - req->private_bio = NULL; - put_ldev(mdev); - } _req_may_be_done_not_susp(req, m); break; }; @@ -840,7 +843,8 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s } else { /* READ || READA */ if (local) { - if (!drbd_may_do_local_read(mdev, sector, size)) { + if (!drbd_may_do_local_read(mdev, sector, size) || + remote_due_to_read_balancing(mdev, sector)) { /* we could kick the syncer to * sync this extent asap, wait for * it, then continue locally. @@ -850,10 +854,6 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s bio_put(req->private_bio); req->private_bio = NULL; put_ldev(mdev); - } else if (remote_due_to_read_balancing(mdev, sector)) { - /* Keep the private bio in case we need it - for a local retry */ - local = 0; } } remote = !local && mdev->state.pdsk >= D_UP_TO_DATE; @@ -1081,7 +1081,7 @@ fail_free_complete: if (req->rq_state & RQ_IN_ACT_LOG) drbd_al_complete_io(mdev, &req->i); fail_and_free_req: - if (!IS_ERR_OR_NULL(req->private_bio)) { + if (local) { bio_put(req->private_bio); req->private_bio = NULL; put_ldev(mdev); From b8853dbd8c6410d1faef2785e8ee4c990b068a77 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 13 Dec 2011 11:09:16 +0100 Subject: [PATCH 468/609] drbd: fix race between disconnect and receive_state If the asender thread, or request_timer_fn(), or some other part of the code, decided to drop the connection (because of timeout or other), but the receiver just now was processing a P_STATE packet, there was a chance that receive_state() would do a hard state change "re-establishing" an already failed connection without additional handshake. Log excerpt: Remote failed to finish a request within ko-count * timeout peer( Secondary -> Unknown ) conn( Connected -> Timeout ) pdsk( UpToDate -> DUnknown ) asender terminated ... peer( Unknown -> Secondary ) conn( Timeout -> Connected ) pdsk( DUnknown -> UpToDate ) peer_isp( 0 -> 1 ) ... Connection closed peer( Secondary -> Unknown ) conn( Connected -> Unconnected ) pdsk( UpToDate -> DUnknown ) peer_isp( 1 -> 0 ) receiver terminated Impact: while the connection state is erroneously "Connected", requests may be queued and even sent, which would never be acknowledged, and may have been missed by the cleanup. These requests would never be completed. The next drbd_suspend_io() will then lock up, waiting forever for these requests to complete. Fixed in several code paths: Make sure the connection state is NetworkFailure or worse before starting the cleanup in drbd_disconnect(). This should make sure the cleanup won't miss any requests. Disallow receive_state() to "upgrade" the connection state from an error state. This will make sure the "illegal" state transition won't happen. For all connection failure states, relax the safe-guard in sanitize_state() again to silently mask out those state changes (e.g. Timeout -> Connected becomes Timeout -> Timeout). Note by Philipp Reisner: The 3rd chunk described as "relax the safe-guard..." is not there in 8.4 as it is relaxed to the maximum in 8.4 already Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 733b8bd663d..1b6845a6ba8 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3787,6 +3787,12 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) os = ns = drbd_read_state(mdev); spin_unlock_irq(&mdev->tconn->req_lock); + /* If some other part of the code (asender thread, timeout) + * already decided to close the connection again, + * we must not "re-establish" it here. */ + if (os.conn <= C_TEAR_DOWN) + return false; + /* If this is the "end of sync" confirmation, usually the peer disk * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits * set) resync started in PausedSyncT, or if the timing of pause-/ @@ -4368,6 +4374,13 @@ static void conn_disconnect(struct drbd_tconn *tconn) if (tconn->cstate == C_STANDALONE) return; + /* We are about to start the cleanup after connection loss. + * Make sure drbd_make_request knows about that. + * Usually we should be in some network failure state already, + * but just in case we are not, we fix it up here. + */ + conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD); + /* asender does not clean up anything. it must not interfere, either */ drbd_thread_stop(&tconn->asender); drbd_free_sock(tconn); From 6ab9b1b60b1854bf5fe68ecd51cb9550c67801ec Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 13 Dec 2011 18:32:18 +0100 Subject: [PATCH 469/609] drbd: Do not send state packets while lower than C_CONNECTED cstate I.e. in C_WF_REPORT_PARAMS or in C_WF_CONNECTION. Sending may already work in these cstates, but the peer still expects the HandShake / ConnectionFeatures packet. Actually triggered by the Testuite on kugel. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 58c77bbae44..507cfccc1be 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1325,7 +1325,9 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, "ASSERT FAILED: disk is %s during detach\n", drbd_disk_str(mdev->state.disk)); - drbd_send_state(mdev, ns); + if (ns.conn >= C_CONNECTED) + drbd_send_state(mdev, ns); + drbd_rs_cancel_all(mdev); /* In case we want to get something to stable storage still, @@ -1353,14 +1355,15 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, mdev->rs_failed = 0; atomic_set(&mdev->rs_pending_cnt, 0); - drbd_send_state(mdev, ns); + if (ns.conn >= C_CONNECTED) + drbd_send_state(mdev, ns); /* corresponding get_ldev in __drbd_set_state * this may finally trigger drbd_ldev_destroy. */ put_ldev(mdev); } /* Notify peer that I had a local IO error, and did not detached.. */ - if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT) + if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED) drbd_send_state(mdev, ns); /* Disks got bigger while they were detached */ From e8cdc3433568a1741e95c1d4412a7fb9736d0eac Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 13 Dec 2011 11:36:57 +0100 Subject: [PATCH 470/609] drbd: Consider that read requests could be NEG_ACKEDed ap_in_flight only counts writes. NEG_ACKED is an action on a request that might be called for reads and writes. This bug was there forever, but it becomes much more relevant with the read balincing code. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index b1957d6c8bf..cf3c10e23c5 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -628,7 +628,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* assert something? */ if (req->rq_state & RQ_NET_PENDING) { dec_ap_pending(mdev); - atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); + if (req->rq_state & RQ_WRITE) + atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); } req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); From a01842ebeea315cbe4382703c065f671d8cd7622 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 13 Dec 2011 17:40:53 +0100 Subject: [PATCH 471/609] drbd: Log failures of connection state changes Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 507cfccc1be..70aa9603e36 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1730,7 +1730,13 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ conn_err(tconn, "Could not kmalloc an acscw\n"); } -abort: + return rv; + abort: + if (flags & CS_VERBOSE) { + conn_err(tconn, "State change failed: %s\n", drbd_set_st_err_str(rv)); + conn_err(tconn, " state = { cs:%s }\n", drbd_conn_str(oc)); + conn_err(tconn, "wanted = { cs:%s }\n", drbd_conn_str(val.conn)); + } return rv; } From 7da358625c056b631b0abf2fcf50dad5bcbc22d9 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 19 Dec 2011 22:42:56 +0100 Subject: [PATCH 472/609] drbd: Restore late assigning of tconn->data.sock and meta.sock With commit from Mon Mar 28 16:33:12 2011 +0200 "drbd: drbd_connect(): Initialize struct drbd_socket before sending anything" tconn->data.sock and tconn->meta.sock get assigned early, in conn_connect. The early assigning can trigger an OOPS, because it may released the socket without acquiring the mutex protecting the socket. An other thread (worker) might use setsockopt() on the socket while it gets free()ed. Restored the (proven) 8.3 behavior of assigning these sockets after the two connections are established. Credits for reporting the issue are going to Arne Redlich. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 106 ++++++++++++++++------------- 1 file changed, 57 insertions(+), 49 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1b6845a6ba8..c8d3f38d539 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -842,7 +842,7 @@ int drbd_connected(struct drbd_conf *mdev) */ static int conn_connect(struct drbd_tconn *tconn) { - struct socket *sock, *msock; + struct drbd_socket sock, msock; struct drbd_conf *mdev; struct net_conf *nc; int vnr, timeout, try, h, ok; @@ -851,6 +851,15 @@ static int conn_connect(struct drbd_tconn *tconn) if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS) return -2; + mutex_init(&sock.mutex); + sock.sbuf = tconn->data.sbuf; + sock.rbuf = tconn->data.rbuf; + sock.socket = NULL; + mutex_init(&msock.mutex); + msock.sbuf = tconn->meta.sbuf; + msock.rbuf = tconn->meta.rbuf; + msock.socket = NULL; + clear_bit(DISCARD_CONCURRENT, &tconn->flags); /* Assume that the peer only understands protocol 80 until we know better. */ @@ -869,22 +878,26 @@ static int conn_connect(struct drbd_tconn *tconn) } if (s) { - if (!tconn->data.socket) { - tconn->data.socket = s; - send_first_packet(tconn, &tconn->data, P_INITIAL_DATA); - } else if (!tconn->meta.socket) { - tconn->meta.socket = s; - send_first_packet(tconn, &tconn->meta, P_INITIAL_META); + if (!sock.socket) { + sock.socket = s; + send_first_packet(tconn, &sock, P_INITIAL_DATA); + } else if (!msock.socket) { + msock.socket = s; + send_first_packet(tconn, &msock, P_INITIAL_META); } else { conn_err(tconn, "Logic error in conn_connect()\n"); goto out_release_sockets; } } - if (tconn->data.socket && tconn->meta.socket) { - schedule_timeout_interruptible(tconn->net_conf->ping_timeo*HZ/10); - ok = drbd_socket_okay(&tconn->data.socket); - ok = drbd_socket_okay(&tconn->meta.socket) && ok; + if (sock.socket && msock.socket) { + rcu_read_lock(); + nc = rcu_dereference(tconn->net_conf); + timeout = nc->ping_timeo * HZ / 10; + rcu_read_unlock(); + schedule_timeout_interruptible(timeout); + ok = drbd_socket_okay(&sock.socket); + ok = drbd_socket_okay(&msock.socket) && ok; if (ok) break; } @@ -893,22 +906,22 @@ retry: s = drbd_wait_for_connect(tconn); if (s) { try = receive_first_packet(tconn, s); - drbd_socket_okay(&tconn->data.socket); - drbd_socket_okay(&tconn->meta.socket); + drbd_socket_okay(&sock.socket); + drbd_socket_okay(&msock.socket); switch (try) { case P_INITIAL_DATA: - if (tconn->data.socket) { + if (sock.socket) { conn_warn(tconn, "initial packet S crossed\n"); - sock_release(tconn->data.socket); + sock_release(sock.socket); } - tconn->data.socket = s; + sock.socket = s; break; case P_INITIAL_META: - if (tconn->meta.socket) { + if (msock.socket) { conn_warn(tconn, "initial packet M crossed\n"); - sock_release(tconn->meta.socket); + sock_release(msock.socket); } - tconn->meta.socket = s; + msock.socket = s; set_bit(DISCARD_CONCURRENT, &tconn->flags); break; default: @@ -928,49 +941,48 @@ retry: goto out_release_sockets; } - if (tconn->data.socket && &tconn->meta.socket) { - ok = drbd_socket_okay(&tconn->data.socket); - ok = drbd_socket_okay(&tconn->meta.socket) && ok; + if (sock.socket && &msock.socket) { + ok = drbd_socket_okay(&sock.socket); + ok = drbd_socket_okay(&msock.socket) && ok; if (ok) break; } } while (1); - sock = tconn->data.socket; - msock = tconn->meta.socket; + sock.socket->sk->sk_reuse = 1; /* SO_REUSEADDR */ + msock.socket->sk->sk_reuse = 1; /* SO_REUSEADDR */ - msock->sk->sk_reuse = 1; /* SO_REUSEADDR */ - sock->sk->sk_reuse = 1; /* SO_REUSEADDR */ + sock.socket->sk->sk_allocation = GFP_NOIO; + msock.socket->sk->sk_allocation = GFP_NOIO; - sock->sk->sk_allocation = GFP_NOIO; - msock->sk->sk_allocation = GFP_NOIO; - - sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; - msock->sk->sk_priority = TC_PRIO_INTERACTIVE; + sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; + msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE; /* NOT YET ... - * sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10; - * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; + * sock.socket->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10; + * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; * first set it to the P_CONNECTION_FEATURES timeout, * which we set to 4x the configured ping_timeout. */ rcu_read_lock(); nc = rcu_dereference(tconn->net_conf); - sock->sk->sk_sndtimeo = - sock->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10; + sock.socket->sk->sk_sndtimeo = + sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10; - msock->sk->sk_rcvtimeo = nc->ping_int*HZ; + msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ; timeout = nc->timeout * HZ / 10; discard_my_data = nc->discard_my_data; rcu_read_unlock(); - msock->sk->sk_sndtimeo = timeout; + msock.socket->sk->sk_sndtimeo = timeout; /* we don't want delays. * we use TCP_CORK where appropriate, though */ - drbd_tcp_nodelay(sock); - drbd_tcp_nodelay(msock); + drbd_tcp_nodelay(sock.socket); + drbd_tcp_nodelay(msock.socket); + tconn->data.socket = sock.socket; + tconn->meta.socket = msock.socket; tconn->last_received = jiffies; h = drbd_do_features(tconn); @@ -989,8 +1001,8 @@ retry: } } - sock->sk->sk_sndtimeo = timeout; - sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; + tconn->data.socket->sk->sk_sndtimeo = timeout; + tconn->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; if (drbd_send_protocol(tconn) == -EOPNOTSUPP) return -1; @@ -1027,14 +1039,10 @@ retry: return h; out_release_sockets: - if (tconn->data.socket) { - sock_release(tconn->data.socket); - tconn->data.socket = NULL; - } - if (tconn->meta.socket) { - sock_release(tconn->meta.socket); - tconn->meta.socket = NULL; - } + if (sock.socket) + sock_release(sock.socket); + if (msock.socket) + sock_release(msock.socket); return -1; } From 69b6a3b159927d45092f64e07f40d5ecf93e11d8 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 20 Dec 2011 11:49:58 +0100 Subject: [PATCH 473/609] drbd: restart loop in drbd_make_request() [prepare for Linux-3.2] With Linux-3.2 generic_make_request() will no longer loop over the request function until it finally returns 0. Move this loop into our drbd_make_request() function. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index cf3c10e23c5..e9a2f238b89 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -929,8 +929,8 @@ allocate_barrier: } if (drbd_suspended(mdev)) { - /* If we got suspended, use the retry mechanism of - generic_make_request() to restart processing of this + /* If we got suspended, use the retry mechanism in + drbd_make_request() to restart processing of this bio. In the next call to drbd_make_request we sleep in inc_ap_bio() */ ret = 1; @@ -1110,8 +1110,11 @@ int drbd_make_request(struct request_queue *q, struct bio *bio) D_ASSERT(bio->bi_size > 0); D_ASSERT(IS_ALIGNED(bio->bi_size, 512)); - inc_ap_bio(mdev); - return __drbd_make_request(mdev, bio, start_time); + do { + inc_ap_bio(mdev); + } while (__drbd_make_request(mdev, bio, start_time)); + + return 0; } /* This is called by bio_add_page(). From 1a3cde440615b0be304b3f92486c5c69ede4666b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 30 Dec 2011 00:28:23 +0100 Subject: [PATCH 474/609] drbd: drbd_bm_ALe_set_all(): Remove unused function Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 46 -------------------------------- drivers/block/drbd/drbd_int.h | 2 -- 2 files changed, 48 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index fcbc5e1ca50..e63dcd9aada 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1661,49 +1661,3 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) spin_unlock_irqrestore(&b->bm_lock, flags); return count; } - -/* Set all bits covered by the AL-extent al_enr. - * Returns number of bits changed. */ -unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) -{ - struct drbd_bitmap *b = mdev->bitmap; - unsigned long *p_addr, *bm; - unsigned long weight; - unsigned long s, e; - int count, i, do_now; - if (!expect(b)) - return 0; - if (!expect(b->bm_pages)) - return 0; - - spin_lock_irq(&b->bm_lock); - if (BM_DONT_SET & b->bm_flags) - bm_print_lock_info(mdev); - weight = b->bm_set; - - s = al_enr * BM_WORDS_PER_AL_EXT; - e = min_t(size_t, s + BM_WORDS_PER_AL_EXT, b->bm_words); - /* assert that s and e are on the same page */ - D_ASSERT((e-1) >> (PAGE_SHIFT - LN2_BPL + 3) - == s >> (PAGE_SHIFT - LN2_BPL + 3)); - count = 0; - if (s < b->bm_words) { - i = do_now = e-s; - p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s)); - bm = p_addr + MLPP(s); - while (i--) { - count += hweight_long(*bm); - *bm = -1UL; - bm++; - } - bm_unmap(p_addr); - b->bm_set += do_now*BITS_PER_LONG - count; - if (e == b->bm_words) - b->bm_set -= bm_clear_surplus(b); - } else { - dev_err(DEV, "start offset (%lu) too large in drbd_bm_ALe_set_all\n", s); - } - weight = b->bm_set - weight; - spin_unlock_irq(&b->bm_lock); - return weight; -} diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e2cccb40f5a..8001b7a2063 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1310,8 +1310,6 @@ extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); extern void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr); extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); extern int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local); -extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, - unsigned long al_enr); extern size_t drbd_bm_words(struct drbd_conf *mdev); extern unsigned long drbd_bm_bits(struct drbd_conf *mdev); extern sector_t drbd_bm_capacity(struct drbd_conf *mdev); From 4d0fc3fdc3144b974888bb06efad69a0eb85719a Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 20 Jan 2012 13:52:27 +0100 Subject: [PATCH 475/609] drbd: Fixed compat issue with disconnecting 8.4 from a primary 8.3 For compatibility reasons 8.4 has to send P_STATE_CHG_REQ (instead of P_CONN_ST_CHG_REQ) when disconnecting. In the receiving code path we missed to convert the old answer (P_STATE_CHG_REPLY) back to 8.4 logic. Therefore the CL_ST_CHG_SUCCESS or CL_ST_CHG_FAIL bit in the flags word of mdev got set, while the state code was waiting for the CONN_WD_ST_CHG_OKAY or CONN_WD_ST_CHG_FAIL bits in tconn. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_receiver.c | 5 +++++ drivers/block/drbd/drbd_state.c | 3 +++ 3 files changed, 9 insertions(+) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8001b7a2063..e8461f8cb04 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -811,6 +811,7 @@ enum { SEND_PING, /* whether asender should send a ping asap */ SIGNAL_ASENDER, /* whether asender wants to be interrupted */ GOT_PING_ACK, /* set when we receive a ping_ack packet, ping_wait gets woken */ + CONN_WD_ST_CHG_REQ, /* A cluster wide state change on the connection is active */ CONN_WD_ST_CHG_OKAY, CONN_WD_ST_CHG_FAIL, CONN_DRY_RUN, /* Expect disconnect after resync handshake. */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c8d3f38d539..9b83f88c0e8 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4827,6 +4827,11 @@ static int got_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi) if (!mdev) return -EIO; + if (test_bit(CONN_WD_ST_CHG_REQ, &tconn->flags)) { + D_ASSERT(tconn->agreed_pro_version < 100); + return got_conn_RqSReply(tconn, pi); + } + if (retcode >= SS_SUCCESS) { set_bit(CL_ST_CHG_SUCCESS, &mdev->flags); } else { diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 70aa9603e36..05ed131a5a8 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1671,7 +1671,9 @@ conn_cl_wide(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state v spin_unlock_irq(&tconn->req_lock); mutex_lock(&tconn->cstate_mutex); + set_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); if (conn_send_state_req(tconn, mask, val)) { + clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); rv = SS_CW_FAILED_BY_PEER; /* if (f & CS_VERBOSE) print_st_err(mdev, os, ns, rv); */ @@ -1679,6 +1681,7 @@ conn_cl_wide(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state v } wait_event(tconn->ping_wait, (rv = _conn_rq_cond(tconn, mask, val))); + clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); abort: mutex_unlock(&tconn->cstate_mutex); From b17f33cb0abe61942083444fa6d218d7ca147773 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 8 Feb 2012 15:32:51 +0100 Subject: [PATCH 476/609] drbd: explicitly clear unused dp_flags in drbd_send_block We send left-over garbage from the previous packet in P_DATA_REPLY and P_RS_DATA_REPLY packets. That's bad behaviour. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 8b99f4e28cc..c312a3bb936 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1882,6 +1882,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, p->sector = cpu_to_be64(peer_req->i.sector); p->block_id = peer_req->block_id; p->seq_num = 0; /* unused */ + p->dp_flags = 0; if (dgs) drbd_csum_ee(mdev, mdev->tconn->integrity_tfm, peer_req, p + 1); err = __send_command(mdev->tconn, mdev->vnr, sock, cmd, sizeof(*p) + dgs, NULL, peer_req->i.size); From 25b0d6c8c16b2bf6a0430ec9cf137297db4e85e5 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 14 Feb 2012 12:12:35 +0100 Subject: [PATCH 477/609] drbd: Reinstate disabling AL updates with invalidate-remote Commit d0ef827e (drbd: switch configuration interface from connector to genetlink) introduced a regression by removing the ability to set all bits in the out of sync bitmap and to suspend updates to the activity log of a disconnected device via the invalidate-remote management call. Credits for reporting the issue are going to Arne Redlich. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 40 ++++++++++++++++++++++++++++++++- drivers/block/drbd/drbd_state.c | 5 +++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index b369dd112df..c5aa08474a3 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2410,9 +2410,47 @@ out: return 0; } +static int drbd_bmio_set_susp_al(struct drbd_conf *mdev) +{ + int rv; + + rv = drbd_bmio_set_n_write(mdev); + drbd_suspend_al(mdev); + return rv; +} + int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) { - return drbd_adm_simple_request_state(skb, info, NS(conn, C_STARTING_SYNC_S)); + int retcode; /* drbd_ret_code, drbd_state_rv */ + struct drbd_conf *mdev; + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + mdev = adm_ctx.mdev; + + retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); + if (retcode < SS_SUCCESS) { + if (retcode == SS_NEED_CONNECTION && mdev->state.role == R_PRIMARY) { + /* The peer will get a resync upon connect anyways. + * Just make that into a full resync. */ + retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT)); + if (retcode >= SS_SUCCESS) { + if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al, + "set_n_write from invalidate_peer", + BM_LOCKED_SET_ALLOWED)) + retcode = ERR_IO_MD_DISK; + } + } else + retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S)); + } + +out: + drbd_adm_finish(info, retcode); + return 0; } int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 05ed131a5a8..c0563a1aac4 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -628,6 +628,11 @@ is_valid_conn_transition(enum drbd_conns oc, enum drbd_conns nc) if (oc == C_STANDALONE && nc != C_UNCONNECTED) return SS_NEED_CONNECTION; + /* When establishing a connection we need to go through WF_REPORT_PARAMS! + Necessary to do the right thing upon invalidate-remote on a disconnected resource */ + if (oc < C_WF_REPORT_PARAMS && nc >= C_CONNECTED) + return SS_NEED_CONNECTION; + /* After a network error only C_UNCONNECTED or C_DISCONNECTING may follow. */ if (oc >= C_TIMEOUT && oc <= C_TEAR_DOWN && nc != C_UNCONNECTED && nc != C_DISCONNECTING) return SS_IN_TRANSIENT_STATE; From 32db80f6f6326617ed40b2d157709226af4f062b Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 22 Feb 2012 11:51:57 +0100 Subject: [PATCH 478/609] drbd: Consider the disk-timeout also for meta-data IO operations If the backing device is already frozen during attach, we failed to recognize that. The current disk-timeout code works on top of the drbd_request objects. During attach we do not allow IO and therefore never generate a drbd_request object but block before that in drbd_make_request(). This patch adds the timeout to all drbd_md_sync_page_io(). Before this patch we used to go from D_ATTACHING directly to D_DISKLESS if IO failed during attach. We can no longer do this since we have to stay in D_FAILED until all IO ops issued to the backing device returned. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 18 +++++++++-- drivers/block/drbd/drbd_bitmap.c | 4 +-- drivers/block/drbd/drbd_int.h | 3 +- drivers/block/drbd/drbd_state.c | 54 +++++++++++++++++--------------- 4 files changed, 48 insertions(+), 31 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index d69fb7d6181..b2355994a4d 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -129,9 +129,21 @@ static bool md_io_allowed(struct drbd_conf *mdev) return ds >= D_NEGOTIATING || ds == D_ATTACHING; } -void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done) +void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, + unsigned int *done) { - wait_event(mdev->misc_wait, *done || !md_io_allowed(mdev)); + long dt; + + rcu_read_lock(); + dt = rcu_dereference(bdev->disk_conf)->disk_timeout; + rcu_read_unlock(); + dt = dt * HZ / 10; + if (dt == 0) + dt = MAX_SCHEDULE_TIMEOUT; + + dt = wait_event_timeout(mdev->misc_wait, *done || !md_io_allowed(mdev), dt); + if (dt == 0) + dev_err(DEV, "meta-data IO operation timed out\n"); } static int _drbd_md_sync_page_io(struct drbd_conf *mdev, @@ -171,7 +183,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, bio_endio(bio, -EIO); else submit_bio(rw, bio); - wait_until_done_or_disk_failure(mdev, &mdev->md_io.done); + wait_until_done_or_disk_failure(mdev, bdev, &mdev->md_io.done); if (bio_flagged(bio, BIO_UPTODATE)) err = mdev->md_io.error; diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index e63dcd9aada..ef6a79b46df 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1123,7 +1123,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w * "in_flight reached zero, all done" event. */ if (!atomic_dec_and_test(&ctx->in_flight)) - wait_until_done_or_disk_failure(mdev, &ctx->done); + wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); else kref_put(&ctx->kref, &bm_aio_ctx_destroy); @@ -1242,7 +1242,7 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc } bm_page_io_async(ctx, idx, WRITE_SYNC); - wait_until_done_or_disk_failure(mdev, &ctx->done); + wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); if (ctx->error) drbd_chk_io_error(mdev, 1, true); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e8461f8cb04..b914f566ef3 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1428,7 +1428,8 @@ extern void drbd_md_put_buffer(struct drbd_conf *mdev); extern int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, sector_t sector, int rw); extern void drbd_ov_out_of_sync_found(struct drbd_conf *, sector_t, int); -extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done); +extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, + unsigned int *done); extern void drbd_rs_controller_reset(struct drbd_conf *mdev); static inline void ov_out_of_sync_print(struct drbd_conf *mdev) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index c0563a1aac4..96fdbfa79a6 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1308,37 +1308,41 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* first half of local IO error, failure to attach, * or administrative detach */ if (os.disk != D_FAILED && ns.disk == D_FAILED) { - enum drbd_io_error_p eh; - int was_io_error; + enum drbd_io_error_p eh = EP_PASS_ON; + int was_io_error = 0; /* corresponding get_ldev was in __drbd_set_state, to serialize - * our cleanup here with the transition to D_DISKLESS, - * so it is safe to dreference ldev here. */ - rcu_read_lock(); - eh = rcu_dereference(mdev->ldev->disk_conf)->on_io_error; - rcu_read_unlock(); - was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); + * our cleanup here with the transition to D_DISKLESS. + * But is is still not save to dreference ldev here, since + * we might come from an failed Attach before ldev was set. */ + if (mdev->ldev) { + rcu_read_lock(); + eh = rcu_dereference(mdev->ldev->disk_conf)->on_io_error; + rcu_read_unlock(); - /* Immediately allow completion of all application IO, that waits - for completion from the local disk. */ - tl_abort_disk_io(mdev); + was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); - /* current state still has to be D_FAILED, - * there is only one way out: to D_DISKLESS, - * and that may only happen after our put_ldev below. */ - if (mdev->state.disk != D_FAILED) - dev_err(DEV, - "ASSERT FAILED: disk is %s during detach\n", - drbd_disk_str(mdev->state.disk)); + /* Immediately allow completion of all application IO, that waits + for completion from the local disk. */ + tl_abort_disk_io(mdev); - if (ns.conn >= C_CONNECTED) - drbd_send_state(mdev, ns); + /* current state still has to be D_FAILED, + * there is only one way out: to D_DISKLESS, + * and that may only happen after our put_ldev below. */ + if (mdev->state.disk != D_FAILED) + dev_err(DEV, + "ASSERT FAILED: disk is %s during detach\n", + drbd_disk_str(mdev->state.disk)); - drbd_rs_cancel_all(mdev); + if (ns.conn >= C_CONNECTED) + drbd_send_state(mdev, ns); - /* In case we want to get something to stable storage still, - * this may be the last chance. - * Following put_ldev may transition to D_DISKLESS. */ - drbd_md_sync(mdev); + drbd_rs_cancel_all(mdev); + + /* In case we want to get something to stable storage still, + * this may be the last chance. + * Following put_ldev may transition to D_DISKLESS. */ + drbd_md_sync(mdev); + } put_ldev(mdev); if (was_io_error && eh == EP_CALL_HELPER) From 93f5afe9560b80e03a58ba5324ec71124df7655f Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 23 Feb 2012 12:18:11 +0100 Subject: [PATCH 479/609] drbd: If disk timeout expires fail only the affected volume ...and not all volumes of the resource Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index e9a2f238b89..4737ad1f94b 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1153,6 +1153,7 @@ void request_timer_fn(unsigned long data) struct drbd_conf *mdev = (struct drbd_conf *) data; struct drbd_tconn *tconn = mdev->tconn; struct drbd_request *req; /* oldest request */ + struct block_device *bdev; struct list_head *le; struct net_conf *nc; unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */ @@ -1163,6 +1164,7 @@ void request_timer_fn(unsigned long data) if (get_ldev(mdev)) { dt = rcu_dereference(mdev->ldev->disk_conf)->disk_timeout * HZ / 10; + bdev = mdev->ldev->backing_bdev; put_ldev(mdev); } rcu_read_unlock(); @@ -1188,7 +1190,7 @@ void request_timer_fn(unsigned long data) _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); } } - if (dt && req->rq_state & RQ_LOCAL_PENDING) { + if (dt && req->rq_state & RQ_LOCAL_PENDING && req->private_bio->bi_bdev == bdev) { if (time_is_before_eq_jiffies(req->start_time + dt)) { dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n"); __drbd_chk_io_error(mdev, 1); From dd9b360475655838ff8719d8eedecdf4c4cf80d1 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 23 Feb 2012 12:52:31 +0100 Subject: [PATCH 480/609] drbd: Fix module refcount leak in drbd_accept() drbd_accept was modelled after kernel_accept with drbd commit 53eb779 in July 2008. Only, kernel_accept was then broken, and only fixed later with kernel commit 1b08534e in Dec 2008: net: Fix module refcount leak in kernel_accept() Impact: protocol families provided as modules, e.g. ipv6 or ib_sdp, would soon have their reference count become negative, preventing them from being unloaded (likely), or worse, hit zero without actually being unused, allowing them to be unloaded while still in use (unlikely, but if triggered, causing a kernel crash). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 9b83f88c0e8..caf3c3cd2d2 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -487,6 +487,7 @@ static int drbd_accept(const char **what, struct socket *sock, struct socket **n goto out; } (*newsock)->ops = sock->ops; + __module_get((*newsock)->ops->owner); out: return err; From 72585d2428fa3a0daab02ebad1f41e5ef517dbaa Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 23 Feb 2012 12:56:26 +0100 Subject: [PATCH 481/609] drbd: add missing part_round_stats to _drbd_start_io_acct Without this, iostat frequently sees bogus svctime and >= 100% "utilization". Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 4737ad1f94b..bfd14bc9693 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -39,6 +39,7 @@ static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req const int rw = bio_data_dir(bio); int cpu; cpu = part_stat_lock(); + part_round_stats(cpu, &mdev->vdisk->part0); part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]); part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio)); (void) cpu; /* The macro invocations above want the cpu argument, I do not like From 38a05c16b8fc855db2294eec36fde2c665b14e8f Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 7 Mar 2012 13:41:52 +0100 Subject: [PATCH 482/609] drbd: Consider that bio->bi_bdev might be modified below DRBD Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index bfd14bc9693..3e4dc070d3f 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1154,7 +1154,6 @@ void request_timer_fn(unsigned long data) struct drbd_conf *mdev = (struct drbd_conf *) data; struct drbd_tconn *tconn = mdev->tconn; struct drbd_request *req; /* oldest request */ - struct block_device *bdev; struct list_head *le; struct net_conf *nc; unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */ @@ -1165,7 +1164,6 @@ void request_timer_fn(unsigned long data) if (get_ldev(mdev)) { dt = rcu_dereference(mdev->ldev->disk_conf)->disk_timeout * HZ / 10; - bdev = mdev->ldev->backing_bdev; put_ldev(mdev); } rcu_read_unlock(); @@ -1191,7 +1189,7 @@ void request_timer_fn(unsigned long data) _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); } } - if (dt && req->rq_state & RQ_LOCAL_PENDING && req->private_bio->bi_bdev == bdev) { + if (dt && req->rq_state & RQ_LOCAL_PENDING && req->w.mdev == mdev) { if (time_is_before_eq_jiffies(req->start_time + dt)) { dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n"); __drbd_chk_io_error(mdev, 1); From 81f448629aa25051c47b4d5b81702da8cbe922c3 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 26 Mar 2012 15:09:44 +0200 Subject: [PATCH 483/609] drbd: Fix a potential race that could case data inconsistency When we have a write request and a state change C_WF_BITMAP_S -> C_SYNC_SOURCE at the same time, and it happens that the line remote = remote && drbd_should_do_remote(s); stills sees C_WF_BITMAP_S, and send_oos = rw == WRITE && drbd_should_send_oos(s); already sees C_SYNC_SOURCE both are 0. This causes the write to not be mirrored, but marked as out-of-sync on the Sync_Source node. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 3e4dc070d3f..c3f99bde0e1 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -822,6 +822,7 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s int local, remote, send_oos = 0; int err; int ret = 0; + union drbd_dev_state s; /* allocate outside of all locks; */ req = drbd_req_new(mdev, bio); @@ -884,8 +885,9 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s drbd_al_begin_io(mdev, &req->i); } - remote = remote && drbd_should_do_remote(mdev->state); - send_oos = rw == WRITE && drbd_should_send_out_of_sync(mdev->state); + s = mdev->state; + remote = remote && drbd_should_do_remote(s); + send_oos = rw == WRITE && drbd_should_send_out_of_sync(s); D_ASSERT(!(remote && send_oos)); if (!(local || remote) && !drbd_suspended(mdev)) { From d93f63028f3bc07bbcf6fe25d8d7d8a50f441bd9 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 26 Mar 2012 15:49:13 +0200 Subject: [PATCH 484/609] drbd: Fix a potential write ordering issue on SyncTarget nodes Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index caf3c3cd2d2..8a7f61ba74a 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1911,6 +1911,33 @@ static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq) } } +static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2) +{ + return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9))); +} + +/* maybe change sync_ee into interval trees as well? */ +static bool overlaping_resync_write(struct drbd_conf *mdev, struct drbd_peer_request *peer_req) +{ + struct drbd_peer_request *rs_req; + bool rv = 0; + + spin_lock_irq(&mdev->tconn->req_lock); + list_for_each_entry(rs_req, &mdev->sync_ee, w.list) { + if (overlaps(peer_req->i.sector, peer_req->i.size, + rs_req->i.sector, rs_req->i.size)) { + rv = 1; + break; + } + } + spin_unlock_irq(&mdev->tconn->req_lock); + + if (rv) + dev_warn(DEV, "WARN: Avoiding concurrent data/resync write to single sector.\n"); + + return rv; +} + /* Called from receive_Data. * Synchronize packets on sock with packets on msock. * @@ -2192,6 +2219,9 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi) list_add(&peer_req->w.list, &mdev->active_ee); spin_unlock_irq(&mdev->tconn->req_lock); + if (mdev->state.conn == C_SYNC_TARGET) + wait_event(mdev->ee_wait, !overlaping_resync_write(mdev, peer_req)); + if (mdev->tconn->agreed_pro_version < 100) { rcu_read_lock(); switch (rcu_dereference(mdev->tconn->net_conf)->wire_protocol) { From e8744f5aca47cac36af1e2b5eb55f7131bf198df Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 26 Mar 2012 15:57:00 +0200 Subject: [PATCH 485/609] drbd: Fixed detach Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 96fdbfa79a6..8c5cedfb4d9 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -199,7 +199,7 @@ static int cl_wide_st_chg(struct drbd_conf *mdev, ((os.role != R_PRIMARY && ns.role == R_PRIMARY) || (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) || - (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))) || + (os.disk != D_FAILED && ns.disk == D_FAILED))) || (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) || (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S) || (os.conn == C_CONNECTED && ns.conn == C_WF_REPORT_PARAMS); From 4b8514ee288dede5013d23c3d6a285052d8392ab Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 26 Mar 2012 16:12:49 +0200 Subject: [PATCH 486/609] drbd: fix potential data corruption and protocol error Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 4 ++-- drivers/block/drbd/drbd_worker.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index c312a3bb936..2aee581c03e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1729,7 +1729,7 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) struct bio_vec *bvec; int i; /* hint all but last page with MSG_MORE */ - __bio_for_each_segment(bvec, bio, i, 0) { + bio_for_each_segment(bvec, bio, i) { int err; err = _drbd_no_send_page(mdev, bvec->bv_page, @@ -1746,7 +1746,7 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) struct bio_vec *bvec; int i; /* hint all but last page with MSG_MORE */ - __bio_for_each_segment(bvec, bio, i, 0) { + bio_for_each_segment(bvec, bio, i) { int err; err = _drbd_send_page(mdev, bvec->bv_page, diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index e263a5f4aac..3adbc5a2970 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -304,7 +304,7 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio * sg_init_table(&sg, 1); crypto_hash_init(&desc); - __bio_for_each_segment(bvec, bio, i, 0) { + bio_for_each_segment(bvec, bio, i) { sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset); crypto_hash_update(&desc, &sg, sg.length); } From 5cdb0bf322b3a5ed91ff0d3be28c6af0f6a2b6bd Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 26 Mar 2012 16:21:25 +0200 Subject: [PATCH 487/609] drbd: remove now unused seq_num member from struct drbd_request Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 - drivers/block/drbd/drbd_main.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index b914f566ef3..534215885cb 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -572,7 +572,6 @@ struct drbd_request { struct list_head tl_requests; /* ring list in the transfer log */ struct bio *master_bio; /* master bio pointer */ unsigned long rq_state; /* see comments above _req_mod() */ - int seq_num; unsigned long start_time; }; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2aee581c03e..4a59b8a8ba7 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1808,7 +1808,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) return -EIO; p->sector = cpu_to_be64(req->i.sector); p->block_id = (unsigned long)req; - p->seq_num = cpu_to_be32(req->seq_num = atomic_inc_return(&mdev->packet_seq)); + p->seq_num = cpu_to_be32(atomic_inc_return(&mdev->packet_seq)); dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw); if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn <= C_PAUSED_SYNC_T) From a6a7d4f0c12d3e04d96cca58b95bb0358101a078 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 26 Mar 2012 16:21:37 +0200 Subject: [PATCH 488/609] drbd: missing wakeup after drbd_rs_del_all Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index b2355994a4d..9eae2894431 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -1097,6 +1097,7 @@ int drbd_rs_del_all(struct drbd_conf *mdev) put_ldev(mdev); } spin_unlock_irq(&mdev->al_lock); + wake_up(&mdev->al_wait); return 0; } From 9dab3842b5bfffc20135ea56f147e5fe2857be40 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 26 Mar 2012 16:22:00 +0200 Subject: [PATCH 489/609] drbd: fix memleak in error path in bm_rw and drbd_bm_write_range Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index ef6a79b46df..e343817bc69 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1075,8 +1075,8 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n"); - err = -ENODEV; - goto out; + kfree(ctx); + return -ENODEV; } if (!ctx->flags) @@ -1156,7 +1156,6 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); -out: kref_put(&ctx->kref, &bm_aio_ctx_destroy); return err; } @@ -1237,8 +1236,8 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n"); - err = -ENODEV; - goto out; + kfree(ctx); + return -ENODEV; } bm_page_io_async(ctx, idx, WRITE_SYNC); @@ -1251,7 +1250,6 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc mdev->bm_writ_cnt++; err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error; - out: kref_put(&ctx->kref, &bm_aio_ctx_destroy); return err; } From e44d71f36c4e19ee08c70d466cdcf6ae56fbbefd Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 26 Mar 2012 16:22:06 +0200 Subject: [PATCH 490/609] drbd: remove some very outdated comments Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 534215885cb..c5c44d974ee 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -727,13 +727,6 @@ enum bm_flag { /* clear is not expected while bitmap is locked for bulk operation */ }; - -/* TODO sort members for performance - * MAYBE group them further */ - -/* THINK maybe we actually want to use the default "event/%s" worker threads - * or similar in linux 2.6, which uses per cpu data and threads. - */ struct drbd_work_queue { struct list_head q; struct semaphore s; /* producers up it, worker down()s it */ From 0642d5f8e0c974705932a4c3adf02192b9528d7b Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 26 Mar 2012 16:23:02 +0200 Subject: [PATCH 491/609] drbd: remove unused static helper function Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c5c44d974ee..caeb01eef44 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1594,18 +1594,6 @@ static inline struct page *page_chain_next(struct page *page) #define page_chain_for_each_safe(page, n) \ for (; page && ({ n = page_chain_next(page); 1; }); page = n) -static inline int drbd_bio_has_active_page(struct bio *bio) -{ - struct bio_vec *bvec; - int i; - - __bio_for_each_segment(bvec, bio, i, 0) { - if (page_count(bvec->bv_page) > 1) - return 1; - } - - return 0; -} static inline int drbd_peer_req_has_active_page(struct drbd_peer_request *peer_req) { From f9916d61a40e7ad43c2a20444894f85c45512f91 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 26 Mar 2012 16:25:41 +0200 Subject: [PATCH 492/609] drbd: don't pretend that barrier_nr == 0 was special Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 4a59b8a8ba7..a0045ac8804 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -250,9 +250,7 @@ void _tl_add_barrier(struct drbd_tconn *tconn, struct drbd_tl_epoch *new) new->n_writes = 0; newest_before = tconn->newest_tle; - /* never send a barrier number == 0, because that is special-cased - * when using TCQ for our write ordering code */ - new->br_number = (newest_before->br_number+1) ?: 1; + new->br_number = newest_before->br_number+1; if (tconn->newest_tle != new) { tconn->newest_tle->next = new; tconn->newest_tle = new; From 2312f0b3c5ab794fbac9e9bebe90c784c9d449c5 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 24 Nov 2011 10:36:25 +0100 Subject: [PATCH 493/609] drbd: fix potential deadlock during "restart" of conflicting writes w_restart_write(), run from worker context, calls __drbd_make_request() and further drbd_al_begin_io(, delegate=true), which then potentially deadlocks. The previous patch moved a BUG_ON to expose such call paths, which would now be triggered. Also, if we call __drbd_make_request() from resource worker context, like w_restart_write() did, and that should block for whatever reason (!drbd_state_is_stable(), resource suspended, ...), we potentially deadlock the whole resource, as the worker is needed for state changes and other things. Create a dedicated retry workqueue for this instead. Also make sure that inc_ap_bio()/dec_ap_bio() are properly paired, even if do_retry() needs to retry itself, in case __drbd_make_request() returns != 0. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 79 ++++++++++++++++++++++++++++++ drivers/block/drbd/drbd_receiver.c | 32 ++---------- drivers/block/drbd/drbd_req.c | 17 +++++-- drivers/block/drbd/drbd_req.h | 3 ++ 4 files changed, 99 insertions(+), 32 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index a0045ac8804..5529d392e5d 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2383,6 +2383,73 @@ void drbd_minor_destroy(struct kref *kref) kref_put(&tconn->kref, &conn_destroy); } +/* One global retry thread, if we need to push back some bio and have it + * reinserted through our make request function. + */ +static struct retry_worker { + struct workqueue_struct *wq; + struct work_struct worker; + + spinlock_t lock; + struct list_head writes; +} retry; + +static void do_retry(struct work_struct *ws) +{ + struct retry_worker *retry = container_of(ws, struct retry_worker, worker); + LIST_HEAD(writes); + struct drbd_request *req, *tmp; + + spin_lock_irq(&retry->lock); + list_splice_init(&retry->writes, &writes); + spin_unlock_irq(&retry->lock); + + list_for_each_entry_safe(req, tmp, &writes, tl_requests) { + struct drbd_conf *mdev = req->w.mdev; + struct bio *bio = req->master_bio; + unsigned long start_time = req->start_time; + + /* We have exclusive access to this request object. + * If it had not been RQ_POSTPONED, the code path which queued + * it here would have completed and freed it already. + */ + mempool_free(req, drbd_request_mempool); + + /* A single suspended or otherwise blocking device may stall + * all others as well. Fortunately, this code path is to + * recover from a situation that "should not happen": + * concurrent writes in multi-primary setup. + * In a "normal" lifecycle, this workqueue is supposed to be + * destroyed without ever doing anything. + * If it turns out to be an issue anyways, we can do per + * resource (replication group) or per device (minor) retry + * workqueues instead. + */ + + /* We are not just doing generic_make_request(), + * as we want to keep the start_time information. */ + do { + inc_ap_bio(mdev); + } while(__drbd_make_request(mdev, bio, start_time)); + } +} + +void drbd_restart_write(struct drbd_request *req) +{ + unsigned long flags; + spin_lock_irqsave(&retry.lock, flags); + list_move_tail(&req->tl_requests, &retry.writes); + spin_unlock_irqrestore(&retry.lock, flags); + + /* Drop the extra reference that would otherwise + * have been dropped by complete_master_bio. + * do_retry() needs to grab a new one. */ + dec_ap_bio(req->w.mdev); + + queue_work(retry.wq, &retry.worker); +} + + static void drbd_cleanup(void) { unsigned int i; @@ -2402,6 +2469,9 @@ static void drbd_cleanup(void) if (drbd_proc) remove_proc_entry("drbd", NULL); + if (retry.wq) + destroy_workqueue(retry.wq); + drbd_genl_unregister(); idr_for_each_entry(&minors, mdev, i) { @@ -2851,6 +2921,15 @@ int __init drbd_init(void) rwlock_init(&global_state_lock); INIT_LIST_HEAD(&drbd_tconns); + retry.wq = create_singlethread_workqueue("drbd-reissue"); + if (!retry.wq) { + printk(KERN_ERR "drbd: unable to create retry workqueue\n"); + goto fail; + } + INIT_WORK(&retry.worker, do_retry); + spin_lock_init(&retry.lock); + INIT_LIST_HEAD(&retry.writes); + printk(KERN_INFO "drbd: initialized. " "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n", API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 8a7f61ba74a..b159ad15abe 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1748,30 +1748,6 @@ static int receive_RSDataReply(struct drbd_tconn *tconn, struct packet_info *pi) return err; } -static int w_restart_write(struct drbd_work *w, int cancel) -{ - struct drbd_request *req = container_of(w, struct drbd_request, w); - struct drbd_conf *mdev = w->mdev; - struct bio *bio; - unsigned long start_time; - unsigned long flags; - - spin_lock_irqsave(&mdev->tconn->req_lock, flags); - if (!expect(req->rq_state & RQ_POSTPONED)) { - spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); - return -EIO; - } - bio = req->master_bio; - start_time = req->start_time; - /* Postponed requests will not have their master_bio completed! */ - __req_mod(req, DISCARD_WRITE, NULL); - spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); - - while (__drbd_make_request(mdev, bio, start_time)) - /* retry */ ; - return 0; -} - static void restart_conflicting_writes(struct drbd_conf *mdev, sector_t sector, int size) { @@ -1785,11 +1761,9 @@ static void restart_conflicting_writes(struct drbd_conf *mdev, if (req->rq_state & RQ_LOCAL_PENDING || !(req->rq_state & RQ_POSTPONED)) continue; - if (expect(list_empty(&req->w.list))) { - req->w.mdev = mdev; - req->w.cb = w_restart_write; - drbd_queue_work(&mdev->tconn->data.work, &req->w); - } + /* as it is RQ_POSTPONED, this will cause it to + * be queued on the retry workqueue. */ + __req_mod(req, DISCARD_WRITE, NULL); } } diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index c3f99bde0e1..5f4436c3abb 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -104,7 +104,7 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const * and never sent), it should still be "empty" as * initialized in drbd_req_new(), so we can list_del() it * here unconditionally */ - list_del(&req->tl_requests); + list_del_init(&req->tl_requests); /* if it was a write, we may have to set the corresponding * bit(s) out-of-sync first. If it had a local part, we need to @@ -143,7 +143,10 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const } } - drbd_req_free(req); + if (s & RQ_POSTPONED) + drbd_restart_write(req); + else + drbd_req_free(req); } static void queue_barrier(struct drbd_conf *mdev) @@ -289,8 +292,16 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) if (!(s & RQ_POSTPONED)) { m->error = ok ? 0 : (error ?: -EIO); m->bio = req->master_bio; + req->master_bio = NULL; + } else { + /* Assert that this will be _req_is_done() + * with this very invokation. */ + /* FIXME: + * what about (RQ_LOCAL_PENDING | RQ_LOCAL_ABORTED)? + */ + D_ASSERT(!(s & RQ_LOCAL_PENDING)); + D_ASSERT(s & RQ_NET_DONE); } - req->master_bio = NULL; } if (s & RQ_LOCAL_PENDING) diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 68f54050b7c..492f81d3765 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -268,6 +268,9 @@ extern void request_timer_fn(unsigned long data); extern void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what); extern void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what); +/* this is in drbd_main.c */ +extern void drbd_restart_write(struct drbd_request *req); + /* use this if you don't want to deal with calling complete_master_bio() * outside the spinlock, e.g. when walking some list on cleanup. */ static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what) From 27a434fe4006efa597a392d3575e3a5e9017994f Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 26 Mar 2012 16:44:59 +0200 Subject: [PATCH 494/609] drbd: make OOS_HANDED_TO_NETWORK its own case Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 5f4436c3abb..d6198b50ea7 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -523,10 +523,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, drbd_queue_work(&mdev->tconn->data.work, &req->w); break; - case OOS_HANDED_TO_NETWORK: - /* actually the same */ case SEND_CANCELED: - /* treat it the same */ case SEND_FAILED: /* real cleanup will be done from tl_clear. just update flags * so it is no longer marked as on the worker queue */ @@ -556,11 +553,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, } req->rq_state &= ~RQ_NET_QUEUED; req->rq_state |= RQ_NET_SENT; - /* because _drbd_send_zc_bio could sleep, and may want to - * dereference the bio even after the "WRITE_ACKED_BY_PEER" and - * "COMPLETED_OK" events came in, once we return from - * _drbd_send_zc_bio (drbd_send_dblock), we have to check - * whether it is done already, and end it. */ + _req_may_be_done_not_susp(req, m); + break; + + case OOS_HANDED_TO_NETWORK: + /* Was not set PENDING, no longer QUEUED, so is now DONE + * as far as this connection is concerned. */ + req->rq_state &= ~RQ_NET_QUEUED; + req->rq_state |= RQ_NET_DONE; _req_may_be_done_not_susp(req, m); break; From ea9d6729bd0f90126e910c31cf64ecb5074f43a7 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 26 Mar 2012 16:46:39 +0200 Subject: [PATCH 495/609] drbd: fix READ_RETRY_REMOTE_CANCELED to not complete if device is suspended Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index d6198b50ea7..771f29c8043 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -523,6 +523,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, drbd_queue_work(&mdev->tconn->data.work, &req->w); break; + case READ_RETRY_REMOTE_CANCELED: case SEND_CANCELED: case SEND_FAILED: /* real cleanup will be done from tl_clear. just update flags @@ -564,9 +565,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, _req_may_be_done_not_susp(req, m); break; - case READ_RETRY_REMOTE_CANCELED: - req->rq_state &= ~RQ_NET_QUEUED; - /* fall through, in case we raced with drbd_disconnect */ case CONNECTION_LOST_WHILE_PENDING: /* transfer log cleanup after connection loss */ /* assert something? */ From 0afd569a40323b8b50bcf553cf9c0fbca9c9e229 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 26 Mar 2012 16:51:11 +0200 Subject: [PATCH 496/609] drbd: fix WRITE_ACKED_BY_PEER_AND_SIS to not set RQ_NET_DONE Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 771f29c8043..938a57bf73c 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -587,28 +587,23 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, } break; - case WRITE_ACKED_BY_PEER_AND_SIS: - req->rq_state |= RQ_NET_SIS; case DISCARD_WRITE: /* for discarded conflicting writes of multiple primaries, * there is no need to keep anything in the tl, potential * node crashes are covered by the activity log. */ req->rq_state |= RQ_NET_DONE; /* fall through */ + case WRITE_ACKED_BY_PEER_AND_SIS: case WRITE_ACKED_BY_PEER: + if (what == WRITE_ACKED_BY_PEER_AND_SIS) + req->rq_state |= RQ_NET_SIS; D_ASSERT(req->rq_state & RQ_EXP_WRITE_ACK); /* protocol C; successfully written on peer. - * Nothing to do here. + * Nothing more to do here. * We want to keep the tl in place for all protocols, to cater - * for volatile write-back caches on lower level devices. - * - * A barrier request is expected to have forced all prior - * requests onto stable storage, so completion of a barrier - * request could set NET_DONE right here, and not wait for the - * P_BARRIER_ACK, but that is an unnecessary optimization. */ + * for volatile write-back caches on lower level devices. */ goto ack_common; - /* this makes it effectively the same as for: */ case RECV_ACKED_BY_PEER: D_ASSERT(req->rq_state & RQ_EXP_RECEIVE_ACK); /* protocol B; pretends to be successfully written on peer. From 8d6cdd784818fd8812f185cd6768d09e43e6f71f Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 26 Mar 2012 16:55:46 +0200 Subject: [PATCH 497/609] drbd: conflicting writes: make wake_up of waiting peer_requests explicit Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 938a57bf73c..c0326f54d3a 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -212,6 +212,16 @@ static void drbd_remove_request_interval(struct rb_root *root, wake_up(&mdev->misc_wait); } +static void maybe_wakeup_conflicting_requests(struct drbd_request *req) +{ + const unsigned long s = req->rq_state; + if (s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED)) + return; + if (req->i.waiting) + /* Retry all conflicting peer requests. */ + wake_up(&req->w.mdev->misc_wait); +} + /* Helper for __req_mod(). * Set m->bio to the master bio, if it is fit to be completed, * or leave it alone (it is initialized to NULL in __req_mod), @@ -235,10 +245,6 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) */ if (s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED)) return; - if (req->i.waiting) { - /* Retry all conflicting peer requests. */ - wake_up(&mdev->misc_wait); - } if (s & RQ_NET_QUEUED) return; if (s & RQ_NET_PENDING) @@ -388,6 +394,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state |= (RQ_LOCAL_COMPLETED|RQ_LOCAL_OK); req->rq_state &= ~RQ_LOCAL_PENDING; + maybe_wakeup_conflicting_requests(req); _req_may_be_done_not_susp(req, m); put_ldev(mdev); break; @@ -405,6 +412,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state &= ~RQ_LOCAL_PENDING; __drbd_chk_io_error(mdev, false); + maybe_wakeup_conflicting_requests(req); _req_may_be_done_not_susp(req, m); put_ldev(mdev); break; @@ -615,6 +623,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, dec_ap_pending(mdev); atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); req->rq_state &= ~RQ_NET_PENDING; + maybe_wakeup_conflicting_requests(req); _req_may_be_done_not_susp(req, m); break; @@ -626,6 +635,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, */ D_ASSERT(req->rq_state & RQ_NET_PENDING); req->rq_state |= RQ_POSTPONED; + maybe_wakeup_conflicting_requests(req); _req_may_be_done_not_susp(req, m); break; @@ -643,6 +653,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, if (!(req->rq_state & RQ_WRITE)) goto goto_read_retry_local; + maybe_wakeup_conflicting_requests(req); _req_may_be_done_not_susp(req, m); /* else: done by HANDED_OVER_TO_NETWORK */ break; From 6870ca6d463e195cf13589e90f8281648b389909 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 26 Mar 2012 17:02:45 +0200 Subject: [PATCH 498/609] drbd: factor out master_bio completion and drbd_request destruction paths In preparation for multiple connections and reference counting, separate the code paths for completion of the master bio and destruction of the request object. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 94 +++++++++++++++++++++-------------- 1 file changed, 58 insertions(+), 36 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index c0326f54d3a..f6a785b239a 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -222,13 +222,44 @@ static void maybe_wakeup_conflicting_requests(struct drbd_request *req) wake_up(&req->w.mdev->misc_wait); } +static +void req_may_be_done(struct drbd_request *req) +{ + const unsigned long s = req->rq_state; + struct drbd_conf *mdev = req->w.mdev; + int rw = req->rq_state & RQ_WRITE ? WRITE : READ; + + /* req->master_bio still present means: Not yet completed. + * + * Unless this is RQ_POSTPONED, which will cause _req_is_done() to + * queue it on the retry workqueue instead of destroying it. + */ + if (req->master_bio && !(s & RQ_POSTPONED)) + return; + + /* Local still pending, even though master_bio is already completed? + * may happen for RQ_LOCAL_ABORTED requests. */ + if (s & RQ_LOCAL_PENDING) + return; + + if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) { + /* this is disconnected (local only) operation, + * or protocol A, B, or C P_BARRIER_ACK, + * or killed from the transfer log due to connection loss. */ + _req_is_done(mdev, req, rw); + } + /* else: network part and not DONE yet. that is + * protocol A, B, or C, barrier ack still pending... */ +} + /* Helper for __req_mod(). * Set m->bio to the master bio, if it is fit to be completed, * or leave it alone (it is initialized to NULL in __req_mod), * if it has already been completed, or cannot be completed yet. * If m->bio is set, the error status to be returned is placed in m->error. */ -void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) +static +void req_may_be_completed(struct drbd_request *req, struct bio_and_error *m) { const unsigned long s = req->rq_state; struct drbd_conf *mdev = req->w.mdev; @@ -309,26 +340,15 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) D_ASSERT(s & RQ_NET_DONE); } } - - if (s & RQ_LOCAL_PENDING) - return; - - if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) { - /* this is disconnected (local only) operation, - * or protocol A, B, or C P_BARRIER_ACK, - * or killed from the transfer log due to connection loss. */ - _req_is_done(mdev, req, rw); - } - /* else: network part and not DONE yet. that is - * protocol A, B, or C, barrier ack still pending... */ + req_may_be_done(req); } -static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_error *m) +static void req_may_be_completed_not_susp(struct drbd_request *req, struct bio_and_error *m) { struct drbd_conf *mdev = req->w.mdev; if (!drbd_suspended(mdev)) - _req_may_be_done(req, m); + req_may_be_completed(req, m); } /* obviously this could be coded as many single functions @@ -395,14 +415,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state &= ~RQ_LOCAL_PENDING; maybe_wakeup_conflicting_requests(req); - _req_may_be_done_not_susp(req, m); + req_may_be_completed_not_susp(req, m); put_ldev(mdev); break; case ABORT_DISK_IO: req->rq_state |= RQ_LOCAL_ABORTED; if (req->rq_state & RQ_WRITE) - _req_may_be_done_not_susp(req, m); + req_may_be_completed_not_susp(req, m); else goto goto_queue_for_net_read; break; @@ -413,7 +433,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, __drbd_chk_io_error(mdev, false); maybe_wakeup_conflicting_requests(req); - _req_may_be_done_not_susp(req, m); + req_may_be_completed_not_susp(req, m); put_ldev(mdev); break; @@ -421,7 +441,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* it is legal to fail READA */ req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; - _req_may_be_done_not_susp(req, m); + req_may_be_completed_not_susp(req, m); put_ldev(mdev); break; @@ -441,7 +461,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* no point in retrying if there is no good remote data, * or we have no connection. */ if (mdev->state.pdsk != D_UP_TO_DATE) { - _req_may_be_done_not_susp(req, m); + req_may_be_completed_not_susp(req, m); break; } @@ -458,8 +478,9 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* from __drbd_make_request * or from bio_endio during read io-error recovery */ - /* so we can verify the handle in the answer packet - * corresponding hlist_del is in _req_may_be_done() */ + /* So we can verify the handle in the answer packet. + * Corresponding drbd_remove_request_interval is in + * req_may_be_completed() */ D_ASSERT(drbd_interval_empty(&req->i)); drbd_insert_interval(&mdev->read_requests, &req->i); @@ -477,7 +498,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* assert something? */ /* from __drbd_make_request only */ - /* corresponding hlist_del is in _req_may_be_done() */ + /* Corresponding drbd_remove_request_interval is in + * req_may_be_completed() */ D_ASSERT(drbd_interval_empty(&req->i)); drbd_insert_interval(&mdev->write_requests, &req->i); @@ -539,7 +561,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state &= ~RQ_NET_QUEUED; /* if we did it right, tl_clear should be scheduled only after * this, so this should not be necessary! */ - _req_may_be_done_not_susp(req, m); + req_may_be_completed_not_susp(req, m); break; case HANDED_OVER_TO_NETWORK: @@ -562,7 +584,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, } req->rq_state &= ~RQ_NET_QUEUED; req->rq_state |= RQ_NET_SENT; - _req_may_be_done_not_susp(req, m); + req_may_be_completed_not_susp(req, m); break; case OOS_HANDED_TO_NETWORK: @@ -570,7 +592,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * as far as this connection is concerned. */ req->rq_state &= ~RQ_NET_QUEUED; req->rq_state |= RQ_NET_DONE; - _req_may_be_done_not_susp(req, m); + req_may_be_completed_not_susp(req, m); break; case CONNECTION_LOST_WHILE_PENDING: @@ -591,7 +613,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, if (!(req->rq_state & RQ_NET_QUEUED)) { if (p) goto goto_read_retry_local; - _req_may_be_done(req, m); /* Allowed while state.susp */ + req_may_be_completed(req, m); /* Allowed while state.susp */ } break; @@ -624,7 +646,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); req->rq_state &= ~RQ_NET_PENDING; maybe_wakeup_conflicting_requests(req); - _req_may_be_done_not_susp(req, m); + req_may_be_completed_not_susp(req, m); break; case POSTPONE_WRITE: @@ -636,7 +658,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, D_ASSERT(req->rq_state & RQ_NET_PENDING); req->rq_state |= RQ_POSTPONED; maybe_wakeup_conflicting_requests(req); - _req_may_be_done_not_susp(req, m); + req_may_be_completed_not_susp(req, m); break; case NEG_ACKED: @@ -654,13 +676,13 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, goto goto_read_retry_local; maybe_wakeup_conflicting_requests(req); - _req_may_be_done_not_susp(req, m); + req_may_be_completed_not_susp(req, m); /* else: done by HANDED_OVER_TO_NETWORK */ break; goto_read_retry_local: if (!drbd_may_do_local_read(mdev, req->i.sector, req->i.size)) { - _req_may_be_done_not_susp(req, m); + req_may_be_completed_not_susp(req, m); break; } D_ASSERT(!(req->rq_state & RQ_LOCAL_PENDING)); @@ -675,7 +697,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, if (!(req->rq_state & RQ_LOCAL_COMPLETED)) break; - _req_may_be_done(req, m); /* Allowed while state.susp */ + req_may_be_completed(req, m); /* Allowed while state.susp */ break; case RESTART_FROZEN_DISK_IO: @@ -696,8 +718,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, case RESEND: /* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK before the connection loss (B&C only); only P_BARRIER_ACK was missing. - Trowing them out of the TL here by pretending we got a BARRIER_ACK - We ensure that the peer was not rebooted */ + Throwing them out of the TL here by pretending we got a BARRIER_ACK. + During connection handshake, we ensure that the peer was not rebooted. */ if (!(req->rq_state & RQ_NET_OK)) { if (req->w.cb) { drbd_queue_work(&mdev->tconn->data.work, &req->w); @@ -723,7 +745,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, if (!(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK))) atomic_sub(req->i.size>>9, &mdev->ap_in_flight); } - _req_may_be_done(req, m); /* Allowed while state.susp */ + req_may_be_done(req); /* Allowed while state.susp */ break; case DATA_RECEIVED: @@ -731,7 +753,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, dec_ap_pending(mdev); req->rq_state &= ~RQ_NET_PENDING; req->rq_state |= (RQ_NET_OK|RQ_NET_DONE); - _req_may_be_done_not_susp(req, m); + req_may_be_completed_not_susp(req, m); break; }; From 2415308eb94e7bddf9c9a0f210374600210274d7 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 26 Mar 2012 17:06:29 +0200 Subject: [PATCH 499/609] drbd: move put_ldev from __req_mod() to the endio callback Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 4 ---- drivers/block/drbd/drbd_worker.c | 1 + 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index f6a785b239a..f708aa1809f 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -416,7 +416,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, maybe_wakeup_conflicting_requests(req); req_may_be_completed_not_susp(req, m); - put_ldev(mdev); break; case ABORT_DISK_IO: @@ -434,7 +433,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, __drbd_chk_io_error(mdev, false); maybe_wakeup_conflicting_requests(req); req_may_be_completed_not_susp(req, m); - put_ldev(mdev); break; case READ_AHEAD_COMPLETED_WITH_ERROR: @@ -442,7 +440,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; req_may_be_completed_not_susp(req, m); - put_ldev(mdev); break; case READ_COMPLETED_WITH_ERROR: @@ -454,7 +451,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, D_ASSERT(!(req->rq_state & RQ_NET_MASK)); __drbd_chk_io_error(mdev, false); - put_ldev(mdev); goto_queue_for_net_read: diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 3adbc5a2970..f10451701cd 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -238,6 +238,7 @@ void drbd_request_endio(struct bio *bio, int error) spin_lock_irqsave(&mdev->tconn->req_lock, flags); __req_mod(req, what, &m); spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); + put_ldev(mdev); if (m.bio) complete_master_bio(mdev, &m); From 4439c400ab278378a82efb543bb3bb91b184d8db Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 26 Mar 2012 17:29:30 +0200 Subject: [PATCH 500/609] drbd: simplify retry path of failed READ requests If a local or remote READ request fails, just push it back to the retry workqueue. It will re-enter __drbd_make_request, and be re-assigned to a suitable local or remote path, or failed, if we do not have access to good data anymore. This obsoletes w_read_retry_remote(), and eliminates two goto...retry blocks in __req_mod() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 - drivers/block/drbd/drbd_req.c | 65 ++++++++++---------------------- drivers/block/drbd/drbd_worker.c | 20 ---------- 3 files changed, 19 insertions(+), 67 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index caeb01eef44..ab9926e915c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1439,7 +1439,6 @@ extern void drbd_csum_bio(struct drbd_conf *, struct crypto_hash *, struct bio * extern void drbd_csum_ee(struct drbd_conf *, struct crypto_hash *, struct drbd_peer_request *, void *); /* worker callbacks */ -extern int w_read_retry_remote(struct drbd_work *, int); extern int w_e_end_data_req(struct drbd_work *, int); extern int w_e_end_rsdata_req(struct drbd_work *, int); extern int w_e_end_csum_rs_req(struct drbd_work *, int); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index f708aa1809f..1249672519c 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -263,7 +263,6 @@ void req_may_be_completed(struct drbd_request *req, struct bio_and_error *m) { const unsigned long s = req->rq_state; struct drbd_conf *mdev = req->w.mdev; - int rw = req->rq_state & RQ_WRITE ? WRITE : READ; /* we must not complete the master bio, while it is * still being processed by _drbd_send_zc_bio (drbd_send_dblock) @@ -282,6 +281,8 @@ void req_may_be_completed(struct drbd_request *req, struct bio_and_error *m) return; if (req->master_bio) { + int rw = bio_rw(req->master_bio); + /* this is DATA_RECEIVED (remote read) * or protocol C P_WRITE_ACK * or protocol B P_RECV_ACK @@ -326,7 +327,18 @@ void req_may_be_completed(struct drbd_request *req, struct bio_and_error *m) /* Update disk stats */ _drbd_end_io_acct(mdev, req); - if (!(s & RQ_POSTPONED)) { + /* if READ failed, + * have it be pushed back to the retry work queue, + * so it will re-enter __drbd_make_request, + * and be re-assigned to a suitable local or remote path, + * or failed if we do not have access to good data anymore. + * READA may fail. + * WRITE should have used all available paths already. + */ + if (!ok && rw == READ) + req->rq_state |= RQ_POSTPONED; + + if (!(req->rq_state & RQ_POSTPONED)) { m->error = ok ? 0 : (error ?: -EIO); m->bio = req->master_bio; req->master_bio = NULL; @@ -420,10 +432,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, case ABORT_DISK_IO: req->rq_state |= RQ_LOCAL_ABORTED; - if (req->rq_state & RQ_WRITE) - req_may_be_completed_not_susp(req, m); - else - goto goto_queue_for_net_read; + req_may_be_completed_not_susp(req, m); break; case WRITE_COMPLETED_WITH_ERROR: @@ -451,20 +460,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, D_ASSERT(!(req->rq_state & RQ_NET_MASK)); __drbd_chk_io_error(mdev, false); - - goto_queue_for_net_read: - - /* no point in retrying if there is no good remote data, - * or we have no connection. */ - if (mdev->state.pdsk != D_UP_TO_DATE) { - req_may_be_completed_not_susp(req, m); - break; - } - - /* _req_mod(req,TO_BE_SENT); oops, recursion... */ - req->rq_state |= RQ_NET_PENDING; - inc_ap_pending(mdev); - /* fall through: _req_mod(req,QUEUE_FOR_NET_READ); */ + break; case QUEUE_FOR_NET_READ: /* READ or READA, and @@ -483,10 +479,9 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, set_bit(UNPLUG_REMOTE, &mdev->flags); D_ASSERT(req->rq_state & RQ_NET_PENDING); + D_ASSERT((req->rq_state & RQ_LOCAL_MASK) == 0); req->rq_state |= RQ_NET_QUEUED; - req->w.cb = (req->rq_state & RQ_LOCAL_MASK) - ? w_read_retry_remote - : w_send_read_req; + req->w.cb = w_send_read_req; drbd_queue_work(&mdev->tconn->data.work, &req->w); break; @@ -604,13 +599,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE) atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); - /* if it is still queued, we may not complete it here. - * it will be canceled soon. */ - if (!(req->rq_state & RQ_NET_QUEUED)) { - if (p) - goto goto_read_retry_local; - req_may_be_completed(req, m); /* Allowed while state.susp */ - } + req_may_be_completed(req, m); /* Allowed while state.susp */ break; case DISCARD_WRITE: @@ -668,27 +657,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state |= RQ_NET_DONE; - if (!(req->rq_state & RQ_WRITE)) - goto goto_read_retry_local; - maybe_wakeup_conflicting_requests(req); req_may_be_completed_not_susp(req, m); /* else: done by HANDED_OVER_TO_NETWORK */ break; - goto_read_retry_local: - if (!drbd_may_do_local_read(mdev, req->i.sector, req->i.size)) { - req_may_be_completed_not_susp(req, m); - break; - } - D_ASSERT(!(req->rq_state & RQ_LOCAL_PENDING)); - req->rq_state |= RQ_LOCAL_PENDING; - - get_ldev(mdev); - req->w.cb = w_restart_disk_io; - drbd_queue_work(&mdev->tconn->data.work, &req->w); - break; - case FAIL_FROZEN_DISK_IO: if (!(req->rq_state & RQ_LOCAL_COMPLETED)) break; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index f10451701cd..d7573f4b742 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -244,26 +244,6 @@ void drbd_request_endio(struct bio *bio, int error) complete_master_bio(mdev, &m); } -int w_read_retry_remote(struct drbd_work *w, int cancel) -{ - struct drbd_request *req = container_of(w, struct drbd_request, w); - struct drbd_conf *mdev = w->mdev; - - /* We should not detach for read io-error, - * but try to WRITE the P_DATA_REPLY to the failed location, - * to give the disk the chance to relocate that block */ - - spin_lock_irq(&mdev->tconn->req_lock); - if (cancel || mdev->state.pdsk != D_UP_TO_DATE) { - _req_mod(req, READ_RETRY_REMOTE_CANCELED); - spin_unlock_irq(&mdev->tconn->req_lock); - return 0; - } - spin_unlock_irq(&mdev->tconn->req_lock); - - return w_send_read_req(w, 0); -} - void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, struct drbd_peer_request *peer_req, void *digest) { From 648e46b531006b069c66f171151819d10b423c26 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 26 Mar 2012 20:12:24 +0200 Subject: [PATCH 501/609] drbd: complete_conflicting_writes() should not care about connections complete_conflicting_writes() should not cause -EIO. It should not timeout either, or care for connection states. Connection timeout is detected elsewhere, and it's cleanup path is supposed to remove any pending requests or peer_requests from the write_requests tree. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 46 ++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 1249672519c..c76402c3f64 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -800,21 +800,33 @@ static bool remote_due_to_read_balancing(struct drbd_conf *mdev, sector_t sector * The write_requests tree contains all active write requests which we * currently know about. Wait for any requests to complete which conflict with * the new one. + * + * Only way out: remove the conflicting intervals from the tree. */ -static int complete_conflicting_writes(struct drbd_conf *mdev, - sector_t sector, int size) +static void complete_conflicting_writes(struct drbd_request *req) { - for(;;) { - struct drbd_interval *i; - int err; + DEFINE_WAIT(wait); + struct drbd_conf *mdev = req->w.mdev; + struct drbd_interval *i; + sector_t sector = req->i.sector; + int size = req->i.size; + i = drbd_find_overlap(&mdev->write_requests, sector, size); + if (!i) + return; + + for (;;) { + prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE); i = drbd_find_overlap(&mdev->write_requests, sector, size); if (!i) - return 0; - err = drbd_wait_misc(mdev, i); - if (err) - return err; + break; + /* Indicate to wake up device->misc_wait on progress. */ + i->waiting = true; + spin_unlock_irq(&mdev->tconn->req_lock); + schedule(); + spin_lock_irq(&mdev->tconn->req_lock); } + finish_wait(&mdev->misc_wait, &wait); } int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time) @@ -826,7 +838,7 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s struct drbd_request *req; struct net_conf *nc; int local, remote, send_oos = 0; - int err; + int err = 0; int ret = 0; union drbd_dev_state s; @@ -925,16 +937,10 @@ allocate_barrier: spin_lock_irq(&mdev->tconn->req_lock); if (rw == WRITE) { - err = complete_conflicting_writes(mdev, sector, size); - if (err) { - if (err != -ERESTARTSYS) - _conn_request_state(mdev->tconn, - NS(conn, C_TIMEOUT), - CS_HARD); - spin_unlock_irq(&mdev->tconn->req_lock); - err = -EIO; - goto fail_free_complete; - } + /* This may temporarily give up the req_lock, + * but will re-aquire it before it returns here. + * Needs to be before the check on drbd_suspended() */ + complete_conflicting_writes(req); } if (drbd_suspended(mdev)) { From 9ed57dcbda37a1a1fb25ccda4206cc417e54e813 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 26 Mar 2012 20:55:17 +0200 Subject: [PATCH 502/609] drbd: ignore volume number for drbd barrier packet exchange Transfer log epochs, and therefore P_BARRIER packets, are per resource, not per volume. We must not associate them with "some random volume". Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 +-- drivers/block/drbd/drbd_main.c | 10 +++--- drivers/block/drbd/drbd_receiver.c | 53 ++++++++++++++---------------- 3 files changed, 31 insertions(+), 36 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index ab9926e915c..85769085485 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -584,7 +584,7 @@ struct drbd_tl_epoch { }; struct drbd_epoch { - struct drbd_conf *mdev; + struct drbd_tconn *tconn; struct list_head list; unsigned int barrier_nr; atomic_t epoch_size; /* increased on every request added. */ @@ -1060,7 +1060,7 @@ extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_f extern int drbd_send_state(struct drbd_conf *mdev, union drbd_state s); extern int drbd_send_current_state(struct drbd_conf *mdev); extern int drbd_send_sync_param(struct drbd_conf *mdev); -extern void drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, +extern void drbd_send_b_ack(struct drbd_tconn *tconn, u32 barrier_nr, u32 set_size); extern int drbd_send_ack(struct drbd_conf *, enum drbd_packet, struct drbd_peer_request *); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 5529d392e5d..f8438d426d0 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1463,21 +1463,21 @@ int drbd_send_bitmap(struct drbd_conf *mdev) return err; } -void drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size) +void drbd_send_b_ack(struct drbd_tconn *tconn, u32 barrier_nr, u32 set_size) { struct drbd_socket *sock; struct p_barrier_ack *p; - if (mdev->state.conn < C_CONNECTED) + if (tconn->cstate < C_WF_REPORT_PARAMS) return; - sock = &mdev->tconn->meta; - p = drbd_prepare_command(mdev, sock); + sock = &tconn->meta; + p = conn_prepare_command(tconn, sock); if (!p) return; p->barrier = barrier_nr; p->set_size = cpu_to_be32(set_size); - drbd_send_command(mdev, sock, P_BARRIER_ACK, sizeof(*p), NULL, 0); + conn_send_command(tconn, sock, P_BARRIER_ACK, sizeof(*p), NULL, 0); } /** diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index b159ad15abe..786a7551079 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1169,11 +1169,15 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *tconn, (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) { if (!(ev & EV_CLEANUP)) { spin_unlock(&tconn->epoch_lock); - drbd_send_b_ack(epoch->mdev, epoch->barrier_nr, epoch_size); + drbd_send_b_ack(epoch->tconn, epoch->barrier_nr, epoch_size); spin_lock(&tconn->epoch_lock); } +#if 0 + /* FIXME: dec unacked on connection, once we have + * something to count pending connection packets in. */ if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) - dec_unacked(epoch->mdev); + dec_unacked(epoch->tconn); +#endif if (tconn->current_epoch != epoch) { next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list); @@ -1369,19 +1373,15 @@ void conn_wait_active_ee_empty(struct drbd_tconn *tconn) static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi) { - struct drbd_conf *mdev; int rv; struct p_barrier *p = pi->data; struct drbd_epoch *epoch; - mdev = vnr_to_mdev(tconn, pi->vnr); - if (!mdev) - return -EIO; - - inc_unacked(mdev); - + /* FIXME these are unacked on connection, + * not a specific (peer)device. + */ tconn->current_epoch->barrier_nr = p->barrier; - tconn->current_epoch->mdev = mdev; + tconn->current_epoch->tconn = tconn; rv = drbd_may_finish_epoch(tconn, tconn->current_epoch, EV_GOT_BARRIER_NR); /* P_BARRIER_ACK may imply that the corresponding extent is dropped from @@ -1400,7 +1400,7 @@ static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi) if (epoch) break; else - dev_warn(DEV, "Allocation of an epoch failed, slowing down\n"); + conn_warn(tconn, "Allocation of an epoch failed, slowing down\n"); /* Fall through */ case WO_bdev_flush: @@ -1414,15 +1414,9 @@ static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi) break; } - epoch = tconn->current_epoch; - wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0); - - D_ASSERT(atomic_read(&epoch->active) == 0); - D_ASSERT(epoch->flags == 0); - return 0; default: - dev_err(DEV, "Strangeness in tconn->write_ordering %d\n", tconn->write_ordering); + conn_err(tconn, "Strangeness in tconn->write_ordering %d\n", tconn->write_ordering); return -EIO; } @@ -5049,21 +5043,22 @@ static int got_NegRSDReply(struct drbd_tconn *tconn, struct packet_info *pi) static int got_BarrierAck(struct drbd_tconn *tconn, struct packet_info *pi) { - struct drbd_conf *mdev; struct p_barrier_ack *p = pi->data; + struct drbd_conf *mdev; + int vnr; - mdev = vnr_to_mdev(tconn, pi->vnr); - if (!mdev) - return -EIO; + tl_release(tconn, p->barrier, be32_to_cpu(p->set_size)); - tl_release(mdev->tconn, p->barrier, be32_to_cpu(p->set_size)); - - if (mdev->state.conn == C_AHEAD && - atomic_read(&mdev->ap_in_flight) == 0 && - !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags)) { - mdev->start_resync_timer.expires = jiffies + HZ; - add_timer(&mdev->start_resync_timer); + rcu_read_lock(); + idr_for_each_entry(&tconn->volumes, mdev, vnr) { + if (mdev->state.conn == C_AHEAD && + atomic_read(&mdev->ap_in_flight) == 0 && + !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags)) { + mdev->start_resync_timer.expires = jiffies + HZ; + add_timer(&mdev->start_resync_timer); + } } + rcu_read_unlock(); return 0; } From 27eb13e99b515c52ba5a151a1acce6afb8a9b2b6 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 30 Mar 2012 14:12:15 +0200 Subject: [PATCH 503/609] drbd: Fixed processing of disk-barrier, disk-flushes and disk-drain Since drbd_bump_write_ordering() is called in the attaching process while the disk state is D_ATTACHING, it was not considering these three flags during attach. A call to this function was missing form drbd_adm_disk_opts(). Fixed both issues. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 3 +++ drivers/block/drbd/drbd_receiver.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index c5aa08474a3..ce9f4ca55ce 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1229,6 +1229,9 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) } mutex_unlock(&mdev->tconn->conf_update); + + drbd_bump_write_ordering(mdev->tconn, WO_bdev_flush); + drbd_md_sync(mdev); if (mdev->state.conn >= C_CONNECTED) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 786a7551079..e8cd4c4acc6 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1229,7 +1229,7 @@ void drbd_bump_write_ordering(struct drbd_tconn *tconn, enum write_ordering_e wo wo = min(pwo, wo); rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) { - if (!get_ldev(mdev)) + if (!get_ldev_if_state(mdev, D_ATTACHING)) continue; dc = rcu_dereference(mdev->ldev->disk_conf); From a1096a6e9da7885ec9e11d93aa459685a33fa4d9 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 6 Apr 2012 12:07:34 +0200 Subject: [PATCH 504/609] drbd: Delay/reject other state changes while establishing a connection Changes to the role and disk state should be delayed or rejected while we establish a connection. This is necessary, since the peer will base its resync decision on the UUIDs and the state we sent in the drbd_connect() function. The most prominent example for this race is becoming primary after sending state and UUIDs and before the state changes to C_WF_CONNECTION. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_nl.c | 2 +- drivers/block/drbd/drbd_receiver.c | 8 +++++++- drivers/block/drbd/drbd_state.c | 29 +++++++++++++++++++++-------- 4 files changed, 30 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 85769085485..4fad3f565ab 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -808,6 +808,7 @@ enum { CONN_WD_ST_CHG_FAIL, CONN_DRY_RUN, /* Expect disconnect after resync handshake. */ CREATE_BARRIER, /* next P_DATA is preceded by a P_BARRIER */ + STATE_SENT, /* Do not change state/UUIDs while this is set */ }; struct drbd_tconn { /* is a resource from the config file */ diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ce9f4ca55ce..3a8fa89f673 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -502,7 +502,7 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn) here, because we might were able to re-establish the connection in the meantime. */ spin_lock_irq(&tconn->req_lock); - if (tconn->cstate < C_WF_REPORT_PARAMS) + if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags)) _conn_request_state(tconn, mask, val, CS_VERBOSE); spin_unlock_irq(&tconn->req_lock); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e8cd4c4acc6..7b690342efa 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -848,6 +848,7 @@ static int conn_connect(struct drbd_tconn *tconn) struct net_conf *nc; int vnr, timeout, try, h, ok; bool discard_my_data; + enum drbd_state_rv rv; if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS) return -2; @@ -1008,6 +1009,8 @@ retry: if (drbd_send_protocol(tconn) == -EOPNOTSUPP) return -1; + set_bit(STATE_SENT, &tconn->flags); + rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) { kref_get(&mdev->kref); @@ -1024,8 +1027,11 @@ retry: } rcu_read_unlock(); - if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS) + rv = conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE); + if (rv < SS_SUCCESS) { + clear_bit(STATE_SENT, &tconn->flags); return 0; + } drbd_thread_start(&tconn->asender); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 8c5cedfb4d9..c4554b17e48 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -53,7 +53,7 @@ static int w_after_state_ch(struct drbd_work *w, int unused); static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); -static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state); +static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_tconn *); static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns); static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns, enum sanitize_state_warnings *warn); @@ -267,7 +267,7 @@ _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, if (rv == SS_UNKNOWN_ERROR) { rv = is_valid_state(mdev, ns); if (rv == SS_SUCCESS) { - rv = is_valid_soft_transition(os, ns); + rv = is_valid_soft_transition(os, ns, mdev->tconn); if (rv == SS_SUCCESS) rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ } @@ -313,7 +313,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, if (cl_wide_st_chg(mdev, os, ns)) { rv = is_valid_state(mdev, ns); if (rv == SS_SUCCESS) - rv = is_valid_soft_transition(os, ns); + rv = is_valid_soft_transition(os, ns, mdev->tconn); spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); if (rv < SS_SUCCESS) { @@ -569,7 +569,7 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) * @os: old state. */ static enum drbd_state_rv -is_valid_soft_transition(union drbd_state os, union drbd_state ns) +is_valid_soft_transition(union drbd_state os, union drbd_state ns, struct drbd_tconn *tconn) { enum drbd_state_rv rv = SS_SUCCESS; @@ -595,6 +595,13 @@ is_valid_soft_transition(union drbd_state os, union drbd_state ns) /* if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS) rv = SS_IN_TRANSIENT_STATE; */ + /* While establishing a connection only allow cstate to change. + Delay/refuse role changes, detach attach etc... */ + if (test_bit(STATE_SENT, &tconn->flags) && + !(os.conn == C_WF_REPORT_PARAMS || + (ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION))) + rv = SS_IN_TRANSIENT_STATE; + if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED) rv = SS_NEED_CONNECTION; @@ -927,9 +934,9 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, this happen...*/ if (is_valid_state(mdev, os) == rv) - rv = is_valid_soft_transition(os, ns); + rv = is_valid_soft_transition(os, ns, mdev->tconn); } else - rv = is_valid_soft_transition(os, ns); + rv = is_valid_soft_transition(os, ns, mdev->tconn); } if (rv < SS_SUCCESS) { @@ -1393,6 +1400,12 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) drbd_send_state(mdev, ns); + /* Wake up role changes, that were delayed because of connection establishing */ + if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) { + if (test_and_clear_bit(STATE_SENT, &mdev->tconn->flags)) + wake_up(&mdev->state_wait); + } + /* This triggers bitmap writeout of potentially still unwritten pages * if the resync finished cleanly, or aborted because of peer disk * failure, or because of connection loss. @@ -1565,9 +1578,9 @@ conn_is_valid_transition(struct drbd_tconn *tconn, union drbd_state mask, union rv = is_valid_state(mdev, ns); if (rv < SS_SUCCESS) { if (is_valid_state(mdev, os) == rv) - rv = is_valid_soft_transition(os, ns); + rv = is_valid_soft_transition(os, ns, tconn); } else - rv = is_valid_soft_transition(os, ns); + rv = is_valid_soft_transition(os, ns, tconn); } if (rv < SS_SUCCESS) break; From e4bad1bcacd82b3d7727e30db26be3580bdc722a Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 6 Apr 2012 12:08:51 +0200 Subject: [PATCH 505/609] drbd: Ensure that data_size is not 0 before using data_size-1 as index This could be exploited by a peer which runs modified code. Reported-by: Dan Carpenter Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7b690342efa..349bd51773e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3344,10 +3344,10 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) if (apv >= 88) { if (apv == 88) { - if (data_size > SHARED_SECRET_MAX) { - dev_err(DEV, "verify-alg too long, " - "peer wants %u, accepting only %u byte\n", - data_size, SHARED_SECRET_MAX); + if (data_size > SHARED_SECRET_MAX || data_size == 0) { + dev_err(DEV, "verify-alg of wrong size, " + "peer wants %u, accepting only up to %u byte\n", + data_size, SHARED_SECRET_MAX); err = -EIO; goto reconnect; } From 3ea35df83fe5e081c793a65361f912ec0835213b Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 6 Apr 2012 12:13:18 +0200 Subject: [PATCH 506/609] drbd: fix spelling, remove boring development log message Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 349bd51773e..7c67be0b7a0 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1891,7 +1891,7 @@ static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2) } /* maybe change sync_ee into interval trees as well? */ -static bool overlaping_resync_write(struct drbd_conf *mdev, struct drbd_peer_request *peer_req) +static bool overlapping_resync_write(struct drbd_conf *mdev, struct drbd_peer_request *peer_req) { struct drbd_peer_request *rs_req; bool rv = 0; @@ -1906,9 +1906,6 @@ static bool overlaping_resync_write(struct drbd_conf *mdev, struct drbd_peer_req } spin_unlock_irq(&mdev->tconn->req_lock); - if (rv) - dev_warn(DEV, "WARN: Avoiding concurrent data/resync write to single sector.\n"); - return rv; } @@ -2194,7 +2191,7 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi) spin_unlock_irq(&mdev->tconn->req_lock); if (mdev->state.conn == C_SYNC_TARGET) - wait_event(mdev->ee_wait, !overlaping_resync_write(mdev, peer_req)); + wait_event(mdev->ee_wait, !overlapping_resync_write(mdev, peer_req)); if (mdev->tconn->agreed_pro_version < 100) { rcu_read_lock(); From 07be15b12c41dbc5ceae130be2e0b655f7611691 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 7 May 2012 11:53:08 +0200 Subject: [PATCH 507/609] drbd: fix resend/resubmit of frozen IO DRBD can freeze IO, due to fencing policy (fencing resource-and-stonith), or because we lost access to data (on-no-data-accessible suspend-io). Resuming from there (re-connect, or re-attach, or explicit admin intervention) should "just work". Unfortunately, if the re-attach/re-connect did not happen within the timeout, since the commit drbd: Implemented real timeout checking for request processing time if so configured, the request_timer_fn() would timeout and detach/disconnect virtually immediately. This change tracks the most recent attach and connect, and does not timeout within after attach/connect. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 ++ drivers/block/drbd/drbd_req.c | 51 +++++++++++++++++++++++---------- drivers/block/drbd/drbd_state.c | 16 ++++++++++- 3 files changed, 53 insertions(+), 16 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4fad3f565ab..976e78cadd3 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -859,6 +859,7 @@ struct drbd_tconn { /* is a resource from the config file */ unsigned int epochs; enum write_ordering_e write_ordering; + unsigned long last_reconnect_jif; struct drbd_thread receiver; struct drbd_thread worker; struct drbd_thread asender; @@ -881,6 +882,7 @@ struct drbd_conf { struct block_device *this_bdev; struct gendisk *vdisk; + unsigned long last_reattach_jif; struct drbd_work resync_work, unplug_work, go_diskless, diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index c76402c3f64..44a7d6ba4e4 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1171,12 +1171,14 @@ void request_timer_fn(unsigned long data) struct list_head *le; struct net_conf *nc; unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */ + unsigned long now; rcu_read_lock(); nc = rcu_dereference(tconn->net_conf); - ent = nc ? nc->timeout * HZ/10 * nc->ko_count : 0; + if (nc && mdev->state.conn >= C_WF_REPORT_PARAMS) + ent = nc->timeout * HZ/10 * nc->ko_count; - if (get_ldev(mdev)) { + if (get_ldev(mdev)) { /* implicit state.disk >= D_INCONSISTENT */ dt = rcu_dereference(mdev->ldev->disk_conf)->disk_timeout * HZ / 10; put_ldev(mdev); } @@ -1184,32 +1186,51 @@ void request_timer_fn(unsigned long data) et = min_not_zero(dt, ent); - if (!et || (mdev->state.conn < C_WF_REPORT_PARAMS && mdev->state.disk <= D_FAILED)) + if (!et) return; /* Recurring timer stopped */ + now = jiffies; + spin_lock_irq(&tconn->req_lock); le = &tconn->oldest_tle->requests; if (list_empty(le)) { spin_unlock_irq(&tconn->req_lock); - mod_timer(&mdev->request_timer, jiffies + et); + mod_timer(&mdev->request_timer, now + et); return; } le = le->prev; req = list_entry(le, struct drbd_request, tl_requests); - if (ent && req->rq_state & RQ_NET_PENDING) { - if (time_is_before_eq_jiffies(req->start_time + ent)) { - dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n"); - _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); - } + + /* The request is considered timed out, if + * - we have some effective timeout from the configuration, + * with above state restrictions applied, + * - the oldest request is waiting for a response from the network + * resp. the local disk, + * - the oldest request is in fact older than the effective timeout, + * - the connection was established (resp. disk was attached) + * for longer than the timeout already. + * Note that for 32bit jiffies and very stable connections/disks, + * we may have a wrap around, which is catched by + * !time_in_range(now, last_..._jif, last_..._jif + timeout). + * + * Side effect: once per 32bit wrap-around interval, which means every + * ~198 days with 250 HZ, we have a window where the timeout would need + * to expire twice (worst case) to become effective. Good enough. + */ + if (ent && req->rq_state & RQ_NET_PENDING && + time_after(now, req->start_time + ent) && + !time_in_range(now, tconn->last_reconnect_jif, tconn->last_reconnect_jif + ent)) { + dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n"); + _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); } - if (dt && req->rq_state & RQ_LOCAL_PENDING && req->w.mdev == mdev) { - if (time_is_before_eq_jiffies(req->start_time + dt)) { - dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n"); - __drbd_chk_io_error(mdev, 1); - } + if (dt && req->rq_state & RQ_LOCAL_PENDING && req->w.mdev == mdev && + time_after(now, req->start_time + dt) && + !time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) { + dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n"); + __drbd_chk_io_error(mdev, 1); } - nt = (time_is_before_eq_jiffies(req->start_time + et) ? jiffies : req->start_time) + et; + nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et; spin_unlock_irq(&tconn->req_lock); mod_timer(&mdev->request_timer, nt); } diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index c4554b17e48..2673049df34 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1075,6 +1075,13 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) drbd_resume_al(mdev); + /* remember last attach time so request_timer_fn() won't + * kill newly established sessions while we are still trying to thaw + * previously frozen IO */ + if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) && + ns.disk > D_NEGOTIATING) + mdev->last_reattach_jif = jiffies; + ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC); if (ascw) { ascw->os = os; @@ -1609,8 +1616,15 @@ conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state enum drbd_state_rv rv; int vnr, number_of_volumes = 0; - if (mask.conn == C_MASK) + if (mask.conn == C_MASK) { + /* remember last connect time so request_timer_fn() won't + * kill newly established sessions while we are still trying to thaw + * previously frozen IO */ + if (tconn->cstate != C_WF_REPORT_PARAMS && val.conn == C_WF_REPORT_PARAMS) + tconn->last_reconnect_jif = jiffies; + tconn->cstate = val.conn; + } rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) { From 5016b82a49eb06cbe2002db7bd8a5501ba4ef6d1 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 7 May 2012 12:00:56 +0200 Subject: [PATCH 508/609] drbd: fix race between drbdadm invalidate/verify and finishing resync When a resync or online verify is finished or aborted, drbd does a bulk write-out of changed bitmap pages. If *in that very moment* a new verify or resync is triggered, this can race: ASSERT( !test_bit(BITMAP_IO, &mdev->flags) ) in drbd_main.c FIXME going to queue 'set_n_write from StartingSync' but 'write from resync_finished' still pending? and similar. This can be observed with e.g. tight invalidate loops in test scripts, and probably has no real-life implication. Still, that race can be solved by first quiescen the device, before starting a new resync or verify. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 3a8fa89f673..cbd45de533c 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2372,6 +2372,7 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) /* If there is still bitmap IO pending, probably because of a previous * resync just being finished, wait for it before requesting a new resync. */ + drbd_suspend_io(mdev); wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); @@ -2390,6 +2391,7 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); } + drbd_resume_io(mdev); out: drbd_adm_finish(info, retcode); @@ -2435,6 +2437,11 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) mdev = adm_ctx.mdev; + /* If there is still bitmap IO pending, probably because of a previous + * resync just being finished, wait for it before requesting a new resync. */ + drbd_suspend_io(mdev); + wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); + retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); if (retcode < SS_SUCCESS) { if (retcode == SS_NEED_CONNECTION && mdev->state.role == R_PRIMARY) { @@ -2450,6 +2457,7 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) } else retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S)); } + drbd_resume_io(mdev); out: drbd_adm_finish(info, retcode); @@ -2903,8 +2911,10 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) } /* If there is still bitmap IO pending, e.g. previous resync or verify * just being finished, wait for it before requesting a new resync. */ + drbd_suspend_io(mdev); wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); retcode = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); + drbd_resume_io(mdev); out: drbd_adm_finish(info, retcode); return 0; From a220d291804233e3a5e3425abf79fa1e62e7bd35 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 7 May 2012 12:07:18 +0200 Subject: [PATCH 509/609] drbd: allow bitmap to change during writeout from resync_finished Symptom: messages similar to "FIXME asender in bm_change_bits_to, bitmap locked for 'write from resync_finished' by worker" If a resync or verify is finished (or aborted), a full bitmap writeout is triggered. If we have ongoing local IO, the bitmap may still change during that writeout, pending and not yet processed acks may cause bits to be cleared, while new writes may cause bits to be to be set. To fix this, introduce the drbd_bm_write_copy_pages() variant. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 16 ++++++++++++++++ drivers/block/drbd/drbd_int.h | 15 +++++++++++---- drivers/block/drbd/drbd_state.c | 4 ++-- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index e343817bc69..ddd29770819 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1190,6 +1190,22 @@ int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(l return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, upper_idx); } +/** + * drbd_bm_write_copy_pages() - Write the whole bitmap to its on disk location. + * @mdev: DRBD device. + * + * Will only write pages that have changed since last IO. + * In contrast to drbd_bm_write(), this will copy the bitmap pages + * to temporary writeout pages. It is intended to trigger a full write-out + * while still allowing the bitmap to change, for example if a resync or online + * verify is aborted due to a failed peer disk, while local IO continues, or + * pending resync acks are still being processed. + */ +int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local) +{ + return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, 0); +} + /** * drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed. * @mdev: DRBD device. diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 976e78cadd3..5b1789af6cd 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -709,22 +709,28 @@ enum bm_flag { BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */ /* currently locked for bulk operation */ - BM_LOCKED_MASK = 0x7, + BM_LOCKED_MASK = 0xf, /* in detail, that is: */ BM_DONT_CLEAR = 0x1, BM_DONT_SET = 0x2, BM_DONT_TEST = 0x4, + /* so we can mark it locked for bulk operation, + * and still allow all non-bulk operations */ + BM_IS_LOCKED = 0x8, + /* (test bit, count bit) allowed (common case) */ - BM_LOCKED_TEST_ALLOWED = 0x3, + BM_LOCKED_TEST_ALLOWED = BM_DONT_CLEAR | BM_DONT_SET | BM_IS_LOCKED, /* testing bits, as well as setting new bits allowed, but clearing bits * would be unexpected. Used during bitmap receive. Setting new bits * requires sending of "out-of-sync" information, though. */ - BM_LOCKED_SET_ALLOWED = 0x1, + BM_LOCKED_SET_ALLOWED = BM_DONT_CLEAR | BM_IS_LOCKED, - /* clear is not expected while bitmap is locked for bulk operation */ + /* for drbd_bm_write_copy_pages, everything is allowed, + * only concurrent bulk operations are locked out. */ + BM_LOCKED_CHANGE_ALLOWED = BM_IS_LOCKED, }; struct drbd_work_queue { @@ -1306,6 +1312,7 @@ extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); extern void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr); extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); extern int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local); +extern int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local); extern size_t drbd_bm_words(struct drbd_conf *mdev); extern unsigned long drbd_bm_bits(struct drbd_conf *mdev); extern sector_t drbd_bm_capacity(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 2673049df34..dd618b5346f 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1421,8 +1421,8 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, * No harm done if some bits change during this phase. */ if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) { - drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, - "write from resync_finished", BM_LOCKED_SET_ALLOWED); + drbd_queue_bitmap_io(mdev, &drbd_bm_write_copy_pages, NULL, + "write from resync_finished", BM_LOCKED_CHANGE_ALLOWED); put_ldev(mdev); } From f66ee69746f6413cae41bdc8b26260e653f62402 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 7 May 2012 13:04:03 +0200 Subject: [PATCH 510/609] drbd: bm_page_async_io: properly initialize page->private If bm_page_async_io is advised to use a new page for I/O (BM_AIO_COPY_PAGES is set), it will get it from a mempool. Once the mempool has to dip into its reserves the page is not reinitialized, i.e. page->private contains garbage, which will lead to various problems once the I/O completes (dereferences of NULL pointers, the submitting thread getting stuck in D-state, ...). Signed-off-by: Arne Redlich Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index ddd29770819..65c55ecfeae 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -200,7 +200,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev) static void bm_store_page_idx(struct page *page, unsigned long idx) { BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK)); - page_private(page) |= idx; + set_page_private(page, idx); } static unsigned long bm_page_to_idx(struct page *page) From 1882e22df7850b7ff74395aa955066f5405020d6 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 7 May 2012 13:09:00 +0200 Subject: [PATCH 511/609] drbd: grammar fix in log message Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7c67be0b7a0..1e9e5145e3c 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2820,7 +2820,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START]; mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1]; - dev_info(DEV, "Did not got last syncUUID packet, corrected:\n"); + dev_info(DEV, "Lost last syncUUID packet, corrected:\n"); drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]); return -1; From c5b005ab7091c9ef4ca9b47569a8e27e54588933 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 30 Apr 2012 12:53:52 +0200 Subject: [PATCH 512/609] drbd: use bitmap_parse instead of __bitmap_parse The buffer 'sc.cpu_mask' is a kernel buffer. If bitmap_parse is used instead of __bitmap_parse the extra parameter that indicates a kernel buffer is not needed. Signed-off-by: H Hartley Sweeten Cc: Lars Ellenberg Cc: Philipp Reisner Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f8438d426d0..9e65bad3e51 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2637,10 +2637,10 @@ int set_resource_options(struct drbd_tconn *tconn, struct res_opts *res_opts) /* silently ignore cpu mask on UP kernel */ if (nr_cpu_ids > 1 && res_opts->cpu_mask[0] != 0) { /* FIXME: Get rid of constant 32 here */ - err = __bitmap_parse(res_opts->cpu_mask, 32, 0, - cpumask_bits(new_cpu_mask), nr_cpu_ids); + err = bitmap_parse(res_opts->cpu_mask, 32, + cpumask_bits(new_cpu_mask), nr_cpu_ids); if (err) { - conn_warn(tconn, "__bitmap_parse() failed with %d\n", err); + conn_warn(tconn, "bitmap_parse() failed with %d\n", err); /* retcode = ERR_CPU_MASK_PARSE; */ goto fail; } From 2820fd396940756904bd7e4136115f493c63d369 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 12 Jul 2012 10:22:48 +0200 Subject: [PATCH 513/609] drbd: Move the call to listen() out of drbd_accept() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1e9e5145e3c..8f25095f390 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -468,11 +468,6 @@ static int drbd_accept(const char **what, struct socket *sock, struct socket **n struct sock *sk = sock->sk; int err = 0; - *what = "listen"; - err = sock->ops->listen(sock, 5); - if (err < 0) - goto out; - *what = "sock_create_lite"; err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, newsock); @@ -742,6 +737,11 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn) if (err < 0) goto out; + what = "listen"; + err = s_listen->ops->listen(s_listen, 5); + if (err < 0) + goto out; + err = drbd_accept(&what, s_listen, &s_estab); out: From 7e0f096b8d3ffcf04955c239e80993ec451c9a1a Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 12 Jul 2012 10:25:35 +0200 Subject: [PATCH 514/609] drbd: Remove drbd_accept() and use kernel_accept() instead Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 30 ++---------------------------- 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 8f25095f390..96ab2ffc2a4 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -461,33 +461,6 @@ static void drbd_wait_ee_list_empty(struct drbd_conf *mdev, spin_unlock_irq(&mdev->tconn->req_lock); } -/* see also kernel_accept; which is only present since 2.6.18. - * also we want to log which part of it failed, exactly */ -static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock) -{ - struct sock *sk = sock->sk; - int err = 0; - - *what = "sock_create_lite"; - err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, - newsock); - if (err < 0) - goto out; - - *what = "accept"; - err = sock->ops->accept(sock, *newsock, 0); - if (err < 0) { - sock_release(*newsock); - *newsock = NULL; - goto out; - } - (*newsock)->ops = sock->ops; - __module_get((*newsock)->ops->owner); - -out: - return err; -} - static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags) { mm_segment_t oldfs; @@ -742,7 +715,8 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn) if (err < 0) goto out; - err = drbd_accept(&what, s_listen, &s_estab); + what = "accept"; + err = kernel_accept(s_listen, &s_estab, 0); out: if (s_listen) From 26ec92871be1e6bd48d0be9ab38ee1ebbeea49f1 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 11 Jul 2012 20:36:03 +0200 Subject: [PATCH 515/609] drbd: Stop using NLA_PUT*(). These macros no longer exist in kernel version v3.5-rc1. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 54 ++++++++++++++++++------------- include/linux/genl_magic_func.h | 8 +++-- include/linux/genl_magic_struct.h | 16 ++++----- 3 files changed, 45 insertions(+), 33 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index cbd45de533c..dc5bd6bbb28 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2554,13 +2554,17 @@ int nla_put_drbd_cfg_context(struct sk_buff *skb, struct drbd_tconn *tconn, unsi nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT); if (!nla) goto nla_put_failure; - if (vnr != VOLUME_UNSPECIFIED) - NLA_PUT_U32(skb, T_ctx_volume, vnr); - NLA_PUT_STRING(skb, T_ctx_resource_name, tconn->name); - if (tconn->my_addr_len) - NLA_PUT(skb, T_ctx_my_addr, tconn->my_addr_len, &tconn->my_addr); - if (tconn->peer_addr_len) - NLA_PUT(skb, T_ctx_peer_addr, tconn->peer_addr_len, &tconn->peer_addr); + if (vnr != VOLUME_UNSPECIFIED && + nla_put_u32(skb, T_ctx_volume, vnr)) + goto nla_put_failure; + if (nla_put_string(skb, T_ctx_resource_name, tconn->name)) + goto nla_put_failure; + if (tconn->my_addr_len && + nla_put(skb, T_ctx_my_addr, tconn->my_addr_len, &tconn->my_addr)) + goto nla_put_failure; + if (tconn->peer_addr_len && + nla_put(skb, T_ctx_peer_addr, tconn->peer_addr_len, &tconn->peer_addr)) + goto nla_put_failure; nla_nest_end(skb, nla); return 0; @@ -2618,20 +2622,23 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO); if (!nla) goto nla_put_failure; - NLA_PUT_U32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY); - NLA_PUT_U32(skb, T_current_state, mdev->state.i); - NLA_PUT_U64(skb, T_ed_uuid, mdev->ed_uuid); - NLA_PUT_U64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev)); + if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) || + nla_put_u32(skb, T_current_state, mdev->state.i) || + nla_put_u64(skb, T_ed_uuid, mdev->ed_uuid) || + nla_put_u64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev))) + goto nla_put_failure; if (got_ldev) { - NLA_PUT_U32(skb, T_disk_flags, mdev->ldev->md.flags); - NLA_PUT(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid); - NLA_PUT_U64(skb, T_bits_total, drbd_bm_bits(mdev)); - NLA_PUT_U64(skb, T_bits_oos, drbd_bm_total_weight(mdev)); + if (nla_put_u32(skb, T_disk_flags, mdev->ldev->md.flags) || + nla_put(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid) || + nla_put_u64(skb, T_bits_total, drbd_bm_bits(mdev)) || + nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(mdev))) + goto nla_put_failure; if (C_SYNC_SOURCE <= mdev->state.conn && C_PAUSED_SYNC_T >= mdev->state.conn) { - NLA_PUT_U64(skb, T_bits_rs_total, mdev->rs_total); - NLA_PUT_U64(skb, T_bits_rs_failed, mdev->rs_failed); + if (nla_put_u64(skb, T_bits_rs_total, mdev->rs_total) || + nla_put_u64(skb, T_bits_rs_failed, mdev->rs_failed)) + goto nla_put_failure; } } @@ -2641,15 +2648,18 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, case SIB_GET_STATUS_REPLY: break; case SIB_STATE_CHANGE: - NLA_PUT_U32(skb, T_prev_state, sib->os.i); - NLA_PUT_U32(skb, T_new_state, sib->ns.i); + if (nla_put_u32(skb, T_prev_state, sib->os.i) || + nla_put_u32(skb, T_new_state, sib->ns.i)) + goto nla_put_failure; break; case SIB_HELPER_POST: - NLA_PUT_U32(skb, - T_helper_exit_code, sib->helper_exit_code); + if (nla_put_u32(skb, T_helper_exit_code, + sib->helper_exit_code)) + goto nla_put_failure; /* fall through */ case SIB_HELPER_PRE: - NLA_PUT_STRING(skb, T_helper, sib->helper_name); + if (nla_put_string(skb, T_helper, sib->helper_name)) + goto nla_put_failure; break; } } diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 0b8a88e2e83..023bc346b87 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -367,7 +367,8 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ __is_signed) \ if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) { \ DPRINT_FIELD(">>", nla_type, name, s, NULL); \ - __put(skb, attr_nr, s->name); \ + if (__put(skb, attr_nr, s->name)) \ + goto nla_put_failure; \ } #undef __array @@ -375,9 +376,10 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ __get, __put, __is_signed) \ if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) { \ DPRINT_ARRAY(">>",nla_type, name, s, NULL); \ - __put(skb, attr_nr, min_t(int, maxlen, \ + if (__put(skb, attr_nr, min_t(int, maxlen, \ s->name ## _len + (nla_type == NLA_NUL_STRING)),\ - s->name); \ + s->name)) \ + goto nla_put_failure; \ } #include GENL_MAGIC_INCLUDE_FILE diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index 1d0bd79e27b..eecd19b3700 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -65,28 +65,28 @@ extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void); /* possible field types */ #define __flg_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U8, char, \ - nla_get_u8, NLA_PUT_U8, false) + nla_get_u8, nla_put_u8, false) #define __u8_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \ - nla_get_u8, NLA_PUT_U8, false) + nla_get_u8, nla_put_u8, false) #define __u16_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U16, __u16, \ - nla_get_u16, NLA_PUT_U16, false) + nla_get_u16, nla_put_u16, false) #define __u32_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U32, __u32, \ - nla_get_u32, NLA_PUT_U32, false) + nla_get_u32, nla_put_u32, false) #define __s32_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U32, __s32, \ - nla_get_u32, NLA_PUT_U32, true) + nla_get_u32, nla_put_u32, true) #define __u64_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U64, __u64, \ - nla_get_u64, NLA_PUT_U64, false) + nla_get_u64, nla_put_u64, false) #define __str_field(attr_nr, attr_flag, name, maxlen) \ __array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \ - nla_strlcpy, NLA_PUT, false) + nla_strlcpy, nla_put, false) #define __bin_field(attr_nr, attr_flag, name, maxlen) \ __array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \ - nla_memcpy, NLA_PUT, false) + nla_memcpy, nla_put, false) /* fields with default values */ #define __flg_field_def(attr_nr, attr_flag, name, default) \ From 9a51ab1c1b3c1e21f076cdd571bbe6ca7d1b504c Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 20 Feb 2012 21:53:28 +0100 Subject: [PATCH 516/609] drbd: New disk option al-updates By disabling al-updates one might increase performace. The price for that is that in case a crashed primary (that had al-updates disabled) is reintegraded, it will receive a full-resync instead of a bitmap based resync. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 12 ++++++++++-- drivers/block/drbd/drbd_nl.c | 17 +++++++++++++++-- include/linux/drbd.h | 1 + include/linux/drbd_genl.h | 3 +++ include/linux/drbd_limits.h | 1 + 5 files changed, 30 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 9eae2894431..83d48d210b6 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -276,8 +276,16 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i) /* Double check: it may have been committed by someone else, * while we have been waiting for the lock. */ if (mdev->act_log->pending_changes) { - al_write_transaction(mdev); - mdev->al_writ_cnt++; + bool write_al_updates; + + rcu_read_lock(); + write_al_updates = rcu_dereference(mdev->ldev->disk_conf)->al_updates; + rcu_read_unlock(); + + if (write_al_updates) { + al_write_transaction(mdev); + mdev->al_writ_cnt++; + } spin_lock_irq(&mdev->al_lock); /* FIXME diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index dc5bd6bbb28..c5d4fac1a11 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1230,6 +1230,11 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) mutex_unlock(&mdev->tconn->conf_update); + if (new_disk_conf->al_updates) + mdev->ldev->md.flags &= MDF_AL_DISABLED; + else + mdev->ldev->md.flags |= MDF_AL_DISABLED; + drbd_bump_write_ordering(mdev->tconn, WO_bdev_flush); drbd_md_sync(mdev); @@ -1545,7 +1550,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } else if (dd == grew) set_bit(RESYNC_AFTER_NEG, &mdev->flags); - if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) { + if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC) || + (test_bit(CRASHED_PRIMARY, &mdev->flags) && + drbd_md_test_flag(mdev->ldev, MDF_AL_DISABLED))) { dev_info(DEV, "Assuming that all blocks are out of sync " "(aka FullSync)\n"); if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, @@ -1588,13 +1595,19 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) if (ns.disk == D_CONSISTENT && (ns.pdsk == D_OUTDATED || rcu_dereference(mdev->ldev->disk_conf)->fencing == FP_DONT_CARE)) ns.disk = D_UP_TO_DATE; - rcu_read_unlock(); /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND, MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before this point, because drbd_request_state() modifies these flags. */ + if (rcu_dereference(mdev->ldev->disk_conf)->al_updates) + mdev->ldev->md.flags &= MDF_AL_DISABLED; + else + mdev->ldev->md.flags |= MDF_AL_DISABLED; + + rcu_read_unlock(); + /* In case we are C_CONNECTED postpone any decision on the new disk state after the negotiation phase. */ if (mdev->state.conn == C_CONNECTED) { diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 1e86156c10f..36ae7dd28d9 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -338,6 +338,7 @@ extern const char *drbd_set_st_err_str(enum drbd_state_rv); #define MDF_PEER_OUT_DATED (1 << 5) #define MDF_CRASHED_PRIMARY (1 << 6) #define MDF_AL_CLEAN (1 << 7) +#define MDF_AL_DISABLED (1 << 8) enum drbd_uuid_index { UI_CURRENT, diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 826008f297f..92ec4b50a88 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -130,6 +130,8 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) __u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF) __u32_field_def(21, 0 /* OPTIONAL */, read_balancing, DRBD_READ_BALANCING_DEF) + /* 9: __u32_field_def(22, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) */ + __flg_field_def(23, 0 /* OPTIONAL */, al_updates, DRBD_AL_UPDATES_DEF) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, @@ -168,6 +170,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __flg_field_def(27, DRBD_GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) __flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, tentative) __flg_field_def(29, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) + /* 9: __u32_field_def(30, DRBD_GENLA_F_MANDATORY, fencing_policy, DRBD_FENCING_DEF) */ ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 17ef66a5c11..1fa19c5f5e6 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -210,6 +210,7 @@ #define DRBD_DISK_DRAIN_DEF 1 #define DRBD_MD_FLUSHES_DEF 1 #define DRBD_TCP_CORK_DEF 1 +#define DRBD_AL_UPDATES_DEF 1 #define DRBD_ALLOW_TWO_PRIMARIES_DEF 0 #define DRBD_ALWAYS_ASBP_DEF 0 From 1f3e509b761d6d8f91acbf7da39624d086e1f2eb Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 12 Jul 2012 11:08:34 +0200 Subject: [PATCH 517/609] drbd: pull prepare_listen_socket() out of drbd_wait_for_connect() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 60 +++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 14 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 96ab2ffc2a4..46c55793dd8 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -666,12 +666,11 @@ out: return sock; } -static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn) +static struct socket *prepare_listen_socket(struct drbd_tconn *tconn) { - int timeo, err, my_addr_len; - int sndbuf_size, rcvbuf_size, connect_int; - struct socket *s_estab = NULL, *s_listen; + int err, sndbuf_size, rcvbuf_size, my_addr_len; struct sockaddr_in6 my_addr; + struct socket *s_listen; struct net_conf *nc; const char *what; @@ -683,7 +682,6 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn) } sndbuf_size = nc->sndbuf_size; rcvbuf_size = nc->rcvbuf_size; - connect_int = nc->connect_int; rcu_read_unlock(); my_addr_len = min_t(int, tconn->my_addr_len, sizeof(struct sockaddr_in6)); @@ -691,18 +689,13 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn) what = "sock_create_kern"; err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family, - SOCK_STREAM, IPPROTO_TCP, &s_listen); + SOCK_STREAM, IPPROTO_TCP, &s_listen); if (err) { s_listen = NULL; goto out; } - timeo = connect_int * HZ; - timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */ - - s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */ - s_listen->sk->sk_rcvtimeo = timeo; - s_listen->sk->sk_sndtimeo = timeo; + s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */ drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size); what = "bind before listen"; @@ -715,7 +708,46 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn) if (err < 0) goto out; - what = "accept"; + return s_listen; +out: + if (s_listen) + sock_release(s_listen); + if (err < 0) { + if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) { + conn_err(tconn, "%s failed, err = %d\n", what, err); + conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); + } + } + + return NULL; +} + +static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn) +{ + int timeo, connect_int, err = 0; + struct socket *s_estab = NULL; + struct socket *s_listen; + struct net_conf *nc; + + rcu_read_lock(); + nc = rcu_dereference(tconn->net_conf); + if (!nc) { + rcu_read_unlock(); + return NULL; + } + connect_int = nc->connect_int; + rcu_read_unlock(); + + timeo = connect_int * HZ; + timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */ + + s_listen = prepare_listen_socket(tconn); + if (!s_listen) + goto out; + + s_listen->sk->sk_rcvtimeo = timeo; + s_listen->sk->sk_sndtimeo = timeo; + err = kernel_accept(s_listen, &s_estab, 0); out: @@ -723,7 +755,7 @@ out: sock_release(s_listen); if (err < 0) { if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) { - conn_err(tconn, "%s failed, err = %d\n", what, err); + conn_err(tconn, "accept failed, err = %d\n", err); conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); } } From 7a426fd8d5af1d5e71cfcdf5ecbefbbad47a81fd Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 12 Jul 2012 14:22:37 +0200 Subject: [PATCH 518/609] drbd: Keep the listening socket open while trying to connect to the peer Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 72 ++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 18 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 46c55793dd8..9aac1c4033c 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -666,7 +666,32 @@ out: return sock; } -static struct socket *prepare_listen_socket(struct drbd_tconn *tconn) +struct accept_wait_data { + struct drbd_tconn *tconn; + struct socket *s_listen; + struct completion door_bell; + void (*original_sk_state_change)(struct sock *sk); + +}; + +static void incomming_connection(struct sock *sk) +{ + struct accept_wait_data *ad = sk->sk_user_data; + struct drbd_tconn *tconn = ad->tconn; + + if (sk->sk_state != TCP_ESTABLISHED) + conn_warn(tconn, "unexpected tcp state change. sk_state = %d\n", sk->sk_state); + + write_lock_bh(&sk->sk_callback_lock); + sk->sk_state_change = ad->original_sk_state_change; + sk->sk_user_data = NULL; + write_unlock_bh(&sk->sk_callback_lock); + + sk->sk_state_change(sk); + complete(&ad->door_bell); +} + +static int prepare_listen_socket(struct drbd_tconn *tconn, struct accept_wait_data *ad) { int err, sndbuf_size, rcvbuf_size, my_addr_len; struct sockaddr_in6 my_addr; @@ -678,7 +703,7 @@ static struct socket *prepare_listen_socket(struct drbd_tconn *tconn) nc = rcu_dereference(tconn->net_conf); if (!nc) { rcu_read_unlock(); - return NULL; + return -EIO; } sndbuf_size = nc->sndbuf_size; rcvbuf_size = nc->rcvbuf_size; @@ -703,12 +728,19 @@ static struct socket *prepare_listen_socket(struct drbd_tconn *tconn) if (err < 0) goto out; + ad->s_listen = s_listen; + write_lock_bh(&s_listen->sk->sk_callback_lock); + ad->original_sk_state_change = s_listen->sk->sk_state_change; + s_listen->sk->sk_state_change = incomming_connection; + s_listen->sk->sk_user_data = ad; + write_unlock_bh(&s_listen->sk->sk_callback_lock); + what = "listen"; err = s_listen->ops->listen(s_listen, 5); if (err < 0) goto out; - return s_listen; + return 0; out: if (s_listen) sock_release(s_listen); @@ -719,14 +751,13 @@ out: } } - return NULL; + return -EIO; } -static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn) +static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn, struct accept_wait_data *ad) { int timeo, connect_int, err = 0; struct socket *s_estab = NULL; - struct socket *s_listen; struct net_conf *nc; rcu_read_lock(); @@ -741,18 +772,11 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn) timeo = connect_int * HZ; timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */ - s_listen = prepare_listen_socket(tconn); - if (!s_listen) - goto out; + err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo); + if (err <= 0) + return NULL; - s_listen->sk->sk_rcvtimeo = timeo; - s_listen->sk->sk_sndtimeo = timeo; - - err = kernel_accept(s_listen, &s_estab, 0); - -out: - if (s_listen) - sock_release(s_listen); + err = kernel_accept(ad->s_listen, &s_estab, 0); if (err < 0) { if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) { conn_err(tconn, "accept failed, err = %d\n", err); @@ -855,6 +879,10 @@ static int conn_connect(struct drbd_tconn *tconn) int vnr, timeout, try, h, ok; bool discard_my_data; enum drbd_state_rv rv; + struct accept_wait_data ad = { + .tconn = tconn, + .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell), + }; if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS) return -2; @@ -873,6 +901,9 @@ static int conn_connect(struct drbd_tconn *tconn) /* Assume that the peer only understands protocol 80 until we know better. */ tconn->agreed_pro_version = 80; + if (prepare_listen_socket(tconn, &ad)) + return 0; + do { struct socket *s; @@ -911,7 +942,7 @@ static int conn_connect(struct drbd_tconn *tconn) } retry: - s = drbd_wait_for_connect(tconn); + s = drbd_wait_for_connect(tconn, &ad); if (s) { try = receive_first_packet(tconn, s); drbd_socket_okay(&sock.socket); @@ -957,6 +988,9 @@ retry: } } while (1); + if (ad.s_listen) + sock_release(ad.s_listen); + sock.socket->sk->sk_reuse = 1; /* SO_REUSEADDR */ msock.socket->sk->sk_reuse = 1; /* SO_REUSEADDR */ @@ -1052,6 +1086,8 @@ retry: return h; out_release_sockets: + if (ad.s_listen) + sock_release(ad.s_listen); if (sock.socket) sock_release(sock.socket); if (msock.socket) From 1b6f19740da8e7ed2d1216dc69a972d10de4f0e9 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 8 Jun 2012 15:06:39 +0200 Subject: [PATCH 519/609] drbd: fix access of unallocated pages and kernel panic BUG: unable to handle kernel NULL pointer dereference at (null) ... [] ? _drbd_bm_set_bits+0x151/0x240 [drbd] [] ? receive_bitmap+0x4f8/0xbc0 [drbd] This fixes an off-by-one error in the receive_bitmap() path, if run-length encoded bitmap transfer is enabled. If the bitmap is an exact multiple of PAGE_SIZE, which means the visible capacity of the drbd device is an exact multiple of 128 MiB (for 4k page size), and bitmap compression (use-rle) is enabled (which became default with 8.4), and the very last bit is dirty and reported in an rle comressed bitmap packet, we ended up trying to kmap_atomic a page pointer that does not exist (bitmap->bm_pages[last index + 1]). bug introduced by: Date: Fri Jul 24 15:33:24 2009 +0200 set bits: optimize for complete last word, fix off-by-one-word corner case made effective by: Date: Thu Dec 16 00:32:38 2010 +0100 drbd: get rid of unused debug code Long time ago, we had paranoia code in the bitmap that allocated one extra word, assigned a magic value, and checked on every occasion that the magic value was still unchanged. That debug code is unused, the extra long word complicates code a bit. Get rid of it. No-one triggered this bug in the last few years, because a large subset of our userbase is unaffected: * typically the last few blocks of a device are not modified frequently, and remain unset * use-rle was disabled by default in drbd < 8.4 * those with slightly "odd" device sizes, or * drbd internal meta data (which will skew the device size slightly, thus makes it harder to have a bug relevant device size) Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 65c55ecfeae..b3d55d4b693 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1535,10 +1535,17 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi first_word = 0; spin_lock_irq(&b->bm_lock); } - /* last page (respectively only page, for first page == last page) */ last_word = MLPP(el >> LN2_BPL); - bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word); + + /* consider bitmap->bm_bits = 32768, bitmap->bm_number_of_pages = 1. (or multiples). + * ==> e = 32767, el = 32768, last_page = 2, + * and now last_word = 0. + * We do not want to touch last_page in this case, + * as we did not allocate it, it is not present in bitmap->bm_pages. + */ + if (last_word) + bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word); /* possibly trailing bits. * example: (e & 63) == 63, el will be e+1. From ab53b90e89eb2421a607655cab426232fdd82f6f Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 8 Jun 2012 16:30:30 +0200 Subject: [PATCH 520/609] drbd: fix local read error hung forever The commit drbd: simplify retry path of failed READ requests simplified it too much: it just did not do anything for local read errors. Add the missing req_may_be_completed_not_susp() to the READ_COMPLETED_WITH_ERROR case. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 44a7d6ba4e4..d0d516743fb 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -460,6 +460,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, D_ASSERT(!(req->rq_state & RQ_NET_MASK)); __drbd_chk_io_error(mdev, false); + req_may_be_completed_not_susp(req, m); break; case QUEUE_FOR_NET_READ: From 629663c94242cfcff45abed61a019304ab85420c Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 8 Jun 2012 16:39:24 +0200 Subject: [PATCH 521/609] drbd: fix wrong assert in completion/retry path of failed local reads Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index d0d516743fb..a3617f33826 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -349,7 +349,7 @@ void req_may_be_completed(struct drbd_request *req, struct bio_and_error *m) * what about (RQ_LOCAL_PENDING | RQ_LOCAL_ABORTED)? */ D_ASSERT(!(s & RQ_LOCAL_PENDING)); - D_ASSERT(s & RQ_NET_DONE); + D_ASSERT((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)); } } req_may_be_done(req); From 9d05e7c4e7069180370ce9c2c121fd6f7810c5a3 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 17 Jul 2012 10:05:04 +0200 Subject: [PATCH 522/609] drbd: rename drbd_restart_write to drbd_restart_request Meanwhile, this is used to restart failed READ requests as well. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_req.c | 2 +- drivers/block/drbd/drbd_req.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 9e65bad3e51..843d0af68f9 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2434,7 +2434,7 @@ static void do_retry(struct work_struct *ws) } } -void drbd_restart_write(struct drbd_request *req) +void drbd_restart_request(struct drbd_request *req) { unsigned long flags; spin_lock_irqsave(&retry.lock, flags); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index a3617f33826..34e791dfaf8 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -144,7 +144,7 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const } if (s & RQ_POSTPONED) - drbd_restart_write(req); + drbd_restart_request(req); else drbd_req_free(req); } diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 492f81d3765..f80af27fa5e 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -269,7 +269,7 @@ extern void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what); extern void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what); /* this is in drbd_main.c */ -extern void drbd_restart_write(struct drbd_request *req); +extern void drbd_restart_request(struct drbd_request *req); /* use this if you don't want to deal with calling complete_master_bio() * outside the spinlock, e.g. when walking some list on cleanup. */ From b379c41ed78e83c4443fca4dbfbc358c19e4f24c Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 17 Nov 2011 11:49:46 +0100 Subject: [PATCH 523/609] drbd: transfer log epoch numbers are now per resource cherry-picked from drbd 9 devel branch. In preparation of multiple connections, the "barrier number" or "epoch number" needs to be tracked per-resource, not per connection. The sequence number space will not be reset anymore. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_main.c | 9 +++------ drivers/block/drbd/drbd_req.c | 4 ++-- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 5b1789af6cd..d7ca76ce00c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -864,6 +864,7 @@ struct drbd_tconn { /* is a resource from the config file */ spinlock_t epoch_lock; unsigned int epochs; enum write_ordering_e write_ordering; + atomic_t current_tle_nr; /* transfer log epoch number */ unsigned long last_reconnect_jif; struct drbd_thread receiver; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 843d0af68f9..bfe6975ef94 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -208,7 +208,7 @@ static int tl_init(struct drbd_tconn *tconn) INIT_LIST_HEAD(&b->requests); INIT_LIST_HEAD(&b->w.list); b->next = NULL; - b->br_number = 4711; + b->br_number = atomic_inc_return(&tconn->current_tle_nr); b->n_writes = 0; b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */ @@ -241,16 +241,13 @@ static void tl_cleanup(struct drbd_tconn *tconn) */ void _tl_add_barrier(struct drbd_tconn *tconn, struct drbd_tl_epoch *new) { - struct drbd_tl_epoch *newest_before; - INIT_LIST_HEAD(&new->requests); INIT_LIST_HEAD(&new->w.list); new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */ new->next = NULL; new->n_writes = 0; - newest_before = tconn->newest_tle; - new->br_number = newest_before->br_number+1; + new->br_number = atomic_inc_return(&tconn->current_tle_nr); if (tconn->newest_tle != new) { tconn->newest_tle->next = new; tconn->newest_tle = new; @@ -406,7 +403,7 @@ void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) list_splice(&carry_reads, &b->requests); INIT_LIST_HEAD(&b->w.list); b->w.cb = NULL; - b->br_number = net_random(); + b->br_number = atomic_inc_return(&tconn->current_tle_nr); b->n_writes = 0; *pn = b; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 34e791dfaf8..a131174b667 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -187,7 +187,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, */ if (mdev->state.conn >= C_CONNECTED && (s & RQ_NET_SENT) != 0 && - req->epoch == mdev->tconn->newest_tle->br_number) + req->epoch == atomic_read(&mdev->tconn->current_tle_nr)) queue_barrier(mdev); } @@ -518,7 +518,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * just after it grabs the req_lock */ D_ASSERT(test_bit(CREATE_BARRIER, &mdev->tconn->flags) == 0); - req->epoch = mdev->tconn->newest_tle->br_number; + req->epoch = atomic_read(&mdev->tconn->current_tle_nr); /* increment size of current epoch */ mdev->tconn->newest_tle->n_writes++; From 8c0785a5c9a0f2472aff68dc32247be01728c416 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 19 Oct 2011 11:50:57 +0200 Subject: [PATCH 524/609] drbd: allow to dequeue batches of work at a time cherry-picked and adapted from drbd 9 devel branch In 8.4, we still use drbd_queue_work_front(), so in normal operation, we can not dequeue batches, but only single items. Still, followup commits will wake the worker without explicitly queueing a work item, so up() is replaced by a simple wake_up(). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 8 ++- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_worker.c | 88 ++++++++++++++------------------ 3 files changed, 43 insertions(+), 55 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index d7ca76ce00c..e84c7b6a6ba 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -735,8 +735,8 @@ enum bm_flag { struct drbd_work_queue { struct list_head q; - struct semaphore s; /* producers up it, worker down()s it */ spinlock_t q_lock; /* to protect the list. */ + wait_queue_head_t q_wait; }; struct drbd_socket { @@ -1832,9 +1832,8 @@ drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w) unsigned long flags; spin_lock_irqsave(&q->q_lock, flags); list_add(&w->list, &q->q); - up(&q->s); /* within the spinlock, - see comment near end of drbd_worker() */ spin_unlock_irqrestore(&q->q_lock, flags); + wake_up(&q->q_wait); } static inline void @@ -1843,9 +1842,8 @@ drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) unsigned long flags; spin_lock_irqsave(&q->q_lock, flags); list_add_tail(&w->list, &q->q); - up(&q->s); /* within the spinlock, - see comment near end of drbd_worker() */ spin_unlock_irqrestore(&q->q_lock, flags); + wake_up(&q->q_wait); } static inline void wake_asender(struct drbd_tconn *tconn) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index bfe6975ef94..f379d33b10a 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2535,9 +2535,9 @@ out: static void drbd_init_workqueue(struct drbd_work_queue* wq) { - sema_init(&wq->s, 0); spin_lock_init(&wq->q_lock); INIT_LIST_HEAD(&wq->q); + init_waitqueue_head(&wq->q_wait); } struct drbd_tconn *conn_get_by_name(const char *name) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index d7573f4b742..fb2e6c8d45c 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1673,6 +1673,23 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) mutex_unlock(mdev->state_mutex); } +bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list) +{ + spin_lock_irq(&queue->q_lock); + list_splice_init(&queue->q, work_list); + spin_unlock_irq(&queue->q_lock); + return !list_empty(work_list); +} + +bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_list) +{ + spin_lock_irq(&queue->q_lock); + if (!list_empty(&queue->q)) + list_move(queue->q.next, work_list); + spin_unlock_irq(&queue->q_lock); + return !list_empty(work_list); +} + int drbd_worker(struct drbd_thread *thi) { struct drbd_tconn *tconn = thi->tconn; @@ -1680,15 +1697,21 @@ int drbd_worker(struct drbd_thread *thi) struct drbd_conf *mdev; struct net_conf *nc; LIST_HEAD(work_list); - int vnr, intr = 0; + int vnr; int cork; while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); - if (down_trylock(&tconn->data.work.s)) { - mutex_lock(&tconn->data.mutex); + /* as long as we use drbd_queue_work_front(), + * we may only dequeue single work items here, not batches. */ + if (list_empty(&work_list)) + dequeue_work_item(&tconn->data.work, &work_list); + /* Still nothing to do? Poke TCP, just in case, + * then wait for new work (or signal). */ + if (list_empty(&work_list)) { + mutex_lock(&tconn->data.mutex); rcu_read_lock(); nc = rcu_dereference(tconn->net_conf); cork = nc ? nc->tcp_cork : 0; @@ -1698,15 +1721,16 @@ int drbd_worker(struct drbd_thread *thi) drbd_tcp_uncork(tconn->data.socket); mutex_unlock(&tconn->data.mutex); - intr = down_interruptible(&tconn->data.work.s); + wait_event_interruptible(tconn->data.work.q_wait, + dequeue_work_item(&tconn->data.work, &work_list)); mutex_lock(&tconn->data.mutex); - if (tconn->data.socket && cork) + if (tconn->data.socket && cork) drbd_tcp_cork(tconn->data.socket); mutex_unlock(&tconn->data.mutex); } - if (intr) { + if (signal_pending(current)) { flush_signals(current); if (get_t_state(thi) == RUNNING) { conn_warn(tconn, "Worker got an unexpected signal\n"); @@ -1717,59 +1741,25 @@ int drbd_worker(struct drbd_thread *thi) if (get_t_state(thi) != RUNNING) break; - /* With this break, we have done a down() but not consumed - the entry from the list. The cleanup code takes care of - this... */ - w = NULL; - spin_lock_irq(&tconn->data.work.q_lock); - if (list_empty(&tconn->data.work.q)) { - /* something terribly wrong in our logic. - * we were able to down() the semaphore, - * but the list is empty... doh. - * - * what is the best thing to do now? - * try again from scratch, restarting the receiver, - * asender, whatnot? could break even more ugly, - * e.g. when we are primary, but no good local data. - * - * I'll try to get away just starting over this loop. - */ - conn_warn(tconn, "Work list unexpectedly empty\n"); - spin_unlock_irq(&tconn->data.work.q_lock); - continue; - } - w = list_entry(tconn->data.work.q.next, struct drbd_work, list); - list_del_init(&w->list); - spin_unlock_irq(&tconn->data.work.q_lock); - - if (w->cb(w, tconn->cstate < C_WF_REPORT_PARAMS)) { - /* dev_warn(DEV, "worker: a callback failed! \n"); */ + while (!list_empty(&work_list)) { + w = list_first_entry(&work_list, struct drbd_work, list); + list_del_init(&w->list); + if (w->cb(w, tconn->cstate < C_WF_REPORT_PARAMS) == 0) + continue; if (tconn->cstate >= C_WF_REPORT_PARAMS) conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD); } } - spin_lock_irq(&tconn->data.work.q_lock); - while (!list_empty(&tconn->data.work.q)) { - list_splice_init(&tconn->data.work.q, &work_list); - spin_unlock_irq(&tconn->data.work.q_lock); - + do { while (!list_empty(&work_list)) { - w = list_entry(work_list.next, struct drbd_work, list); + w = list_first_entry(&work_list, struct drbd_work, list); list_del_init(&w->list); w->cb(w, 1); } - - spin_lock_irq(&tconn->data.work.q_lock); - } - sema_init(&tconn->data.work.s, 0); - /* DANGEROUS race: if someone did queue his work within the spinlock, - * but up() ed outside the spinlock, we could get an up() on the - * semaphore without corresponding list entry. - * So don't do that. - */ - spin_unlock_irq(&tconn->data.work.q_lock); + dequeue_work_batch(&tconn->data.work, &work_list); + } while (!list_empty(&work_list)); rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) { From d5b27b01f17ef1f0badc45f9eea521be3457c9cb Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 14 Nov 2011 15:42:37 +0100 Subject: [PATCH 525/609] drbd: move the drbd_work_queue from drbd_socket to drbd_connection cherry-picked and adapted from drbd 9 devel branch In 8.4, we don't distinguish between "resource work" and "connection work" yet, we have one worker for both, as we still have only one connection. We only ever used the "data.work", no need to keep the "meta.work" around. Move tconn->data.work to tconn->sender_work. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 4 ++-- drivers/block/drbd/drbd_int.h | 4 ++-- drivers/block/drbd/drbd_main.c | 16 ++++++---------- drivers/block/drbd/drbd_receiver.c | 4 ++-- drivers/block/drbd/drbd_req.c | 12 ++++++------ drivers/block/drbd/drbd_state.c | 4 ++-- drivers/block/drbd/drbd_worker.c | 16 ++++++++-------- 7 files changed, 28 insertions(+), 32 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 83d48d210b6..f500dc5cdf5 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -485,7 +485,7 @@ static int al_write_transaction(struct drbd_conf *mdev) init_completion(&al_work.event); al_work.w.cb = w_al_write_transaction; al_work.w.mdev = mdev; - drbd_queue_work_front(&mdev->tconn->data.work, &al_work.w); + drbd_queue_work_front(&mdev->tconn->sender_work, &al_work.w); wait_for_completion(&al_work.event); return al_work.err; @@ -645,7 +645,7 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, udw->enr = ext->lce.lc_number; udw->w.cb = w_update_odbm; udw->w.mdev = mdev; - drbd_queue_work_front(&mdev->tconn->data.work, &udw->w); + drbd_queue_work_front(&mdev->tconn->sender_work, &udw->w); } else { dev_warn(DEV, "Could not kmalloc an udw\n"); } diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e84c7b6a6ba..c0d0de54ae5 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -740,7 +740,6 @@ struct drbd_work_queue { }; struct drbd_socket { - struct drbd_work_queue work; struct mutex mutex; struct socket *socket; /* this way we get our @@ -871,6 +870,7 @@ struct drbd_tconn { /* is a resource from the config file */ struct drbd_thread worker; struct drbd_thread asender; cpumask_var_t cpu_mask; + struct drbd_work_queue sender_work; }; struct drbd_conf { @@ -2228,7 +2228,7 @@ static inline void dec_ap_bio(struct drbd_conf *mdev) wake_up(&mdev->misc_wait); if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) { if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) - drbd_queue_work(&mdev->tconn->data.work, &mdev->bm_io_work.w); + drbd_queue_work(&mdev->tconn->sender_work, &mdev->bm_io_work.w); } } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f379d33b10a..7e37149684e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -379,7 +379,7 @@ void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) set_bit(CREATE_BARRIER, &tconn->flags); } - drbd_queue_work(&tconn->data.work, &b->w); + drbd_queue_work(&tconn->sender_work, &b->w); } pn = &b->next; } else { @@ -2173,8 +2173,7 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev) D_ASSERT(list_empty(&mdev->read_ee)); D_ASSERT(list_empty(&mdev->net_ee)); D_ASSERT(list_empty(&mdev->resync_reads)); - D_ASSERT(list_empty(&mdev->tconn->data.work.q)); - D_ASSERT(list_empty(&mdev->tconn->meta.work.q)); + D_ASSERT(list_empty(&mdev->tconn->sender_work.q)); D_ASSERT(list_empty(&mdev->resync_work.list)); D_ASSERT(list_empty(&mdev->unplug_work.list)); D_ASSERT(list_empty(&mdev->go_diskless.list)); @@ -2349,7 +2348,6 @@ void drbd_minor_destroy(struct kref *kref) /* paranoia asserts */ D_ASSERT(mdev->open_cnt == 0); - D_ASSERT(list_empty(&mdev->tconn->data.work.q)); /* end paranoia asserts */ /* cleanup stuff that may have been allocated during @@ -2700,10 +2698,8 @@ struct drbd_tconn *conn_create(const char *name, struct res_opts *res_opts) init_waitqueue_head(&tconn->ping_wait); idr_init(&tconn->volumes); - drbd_init_workqueue(&tconn->data.work); + drbd_init_workqueue(&tconn->sender_work); mutex_init(&tconn->data.mutex); - - drbd_init_workqueue(&tconn->meta.work); mutex_init(&tconn->meta.mutex); drbd_thread_init(tconn, &tconn->receiver, drbdd_init, "receiver"); @@ -3356,7 +3352,7 @@ void drbd_go_diskless(struct drbd_conf *mdev) { D_ASSERT(mdev->state.disk == D_FAILED); if (!test_and_set_bit(GO_DISKLESS, &mdev->flags)) - drbd_queue_work(&mdev->tconn->data.work, &mdev->go_diskless); + drbd_queue_work(&mdev->tconn->sender_work, &mdev->go_diskless); } /** @@ -3394,7 +3390,7 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev, set_bit(BITMAP_IO, &mdev->flags); if (atomic_read(&mdev->ap_bio_cnt) == 0) { if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) - drbd_queue_work(&mdev->tconn->data.work, &mdev->bm_io_work.w); + drbd_queue_work(&mdev->tconn->sender_work, &mdev->bm_io_work.w); } spin_unlock_irq(&mdev->tconn->req_lock); } @@ -3452,7 +3448,7 @@ static void md_sync_timer_fn(unsigned long data) { struct drbd_conf *mdev = (struct drbd_conf *) data; - drbd_queue_work_front(&mdev->tconn->data.work, &mdev->md_sync_work); + drbd_queue_work_front(&mdev->tconn->sender_work, &mdev->md_sync_work); } static int w_md_sync(struct drbd_work *w, int unused) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 9aac1c4033c..34fc33b5eb4 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4413,7 +4413,7 @@ void conn_flush_workqueue(struct drbd_tconn *tconn) barr.w.cb = w_prev_work_done; barr.w.tconn = tconn; init_completion(&barr.done); - drbd_queue_work(&tconn->data.work, &barr.w); + drbd_queue_work(&tconn->sender_work, &barr.w); wait_for_completion(&barr.done); } @@ -5147,7 +5147,7 @@ static int got_OVResult(struct drbd_tconn *tconn, struct packet_info *pi) if (w) { w->cb = w_ov_finished; w->mdev = mdev; - drbd_queue_work_front(&mdev->tconn->data.work, w); + drbd_queue_work(&mdev->tconn->sender_work, w); } else { dev_err(DEV, "kmalloc(w) failed."); ov_out_of_sync_print(mdev); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index a131174b667..e609557a942 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -170,7 +170,7 @@ static void queue_barrier(struct drbd_conf *mdev) * dec_ap_pending will be done in got_BarrierAck * or (on connection loss) in tl_clear. */ inc_ap_pending(mdev); - drbd_queue_work(&tconn->data.work, &b->w); + drbd_queue_work(&tconn->sender_work, &b->w); set_bit(CREATE_BARRIER, &tconn->flags); } @@ -483,7 +483,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, D_ASSERT((req->rq_state & RQ_LOCAL_MASK) == 0); req->rq_state |= RQ_NET_QUEUED; req->w.cb = w_send_read_req; - drbd_queue_work(&mdev->tconn->data.work, &req->w); + drbd_queue_work(&mdev->tconn->sender_work, &req->w); break; case QUEUE_FOR_NET_WRITE: @@ -527,7 +527,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, D_ASSERT(req->rq_state & RQ_NET_PENDING); req->rq_state |= RQ_NET_QUEUED; req->w.cb = w_send_dblock; - drbd_queue_work(&mdev->tconn->data.work, &req->w); + drbd_queue_work(&mdev->tconn->sender_work, &req->w); /* close the epoch, in case it outgrew the limit */ rcu_read_lock(); @@ -542,7 +542,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, case QUEUE_FOR_SEND_OOS: req->rq_state |= RQ_NET_QUEUED; req->w.cb = w_send_out_of_sync; - drbd_queue_work(&mdev->tconn->data.work, &req->w); + drbd_queue_work(&mdev->tconn->sender_work, &req->w); break; case READ_RETRY_REMOTE_CANCELED: @@ -682,7 +682,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, get_ldev(mdev); req->w.cb = w_restart_disk_io; - drbd_queue_work(&mdev->tconn->data.work, &req->w); + drbd_queue_work(&mdev->tconn->sender_work, &req->w); break; case RESEND: @@ -692,7 +692,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, During connection handshake, we ensure that the peer was not rebooted. */ if (!(req->rq_state & RQ_NET_OK)) { if (req->w.cb) { - drbd_queue_work(&mdev->tconn->data.work, &req->w); + drbd_queue_work(&mdev->tconn->sender_work, &req->w); rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ; } break; diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index dd618b5346f..84a5072d737 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1090,7 +1090,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, ascw->w.cb = w_after_state_ch; ascw->w.mdev = mdev; ascw->done = done; - drbd_queue_work(&mdev->tconn->data.work, &ascw->w); + drbd_queue_work(&mdev->tconn->sender_work, &ascw->w); } else { dev_err(DEV, "Could not kmalloc an ascw\n"); } @@ -1764,7 +1764,7 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ acscw->w.cb = w_after_conn_state_ch; kref_get(&tconn->kref); acscw->w.tconn = tconn; - drbd_queue_work(&tconn->data.work, &acscw->w); + drbd_queue_work(&tconn->sender_work, &acscw->w); } else { conn_err(tconn, "Could not kmalloc an acscw\n"); } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index fb2e6c8d45c..39ece3a2f53 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -109,7 +109,7 @@ void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(lo __drbd_chk_io_error(mdev, false); spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); - drbd_queue_work(&mdev->tconn->data.work, &peer_req->w); + drbd_queue_work(&mdev->tconn->sender_work, &peer_req->w); put_ldev(mdev); } @@ -401,7 +401,7 @@ void resync_timer_fn(unsigned long data) struct drbd_conf *mdev = (struct drbd_conf *) data; if (list_empty(&mdev->resync_work.list)) - drbd_queue_work(&mdev->tconn->data.work, &mdev->resync_work); + drbd_queue_work(&mdev->tconn->sender_work, &mdev->resync_work); } static void fifo_set(struct fifo_buffer *fb, int value) @@ -783,7 +783,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) if (w) { w->cb = w_resync_finished; w->mdev = mdev; - drbd_queue_work(&mdev->tconn->data.work, w); + drbd_queue_work(&mdev->tconn->sender_work, w); return 1; } dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n"); @@ -1484,7 +1484,7 @@ void start_resync_timer_fn(unsigned long data) { struct drbd_conf *mdev = (struct drbd_conf *) data; - drbd_queue_work(&mdev->tconn->data.work, &mdev->start_resync_work); + drbd_queue_work(&mdev->tconn->sender_work, &mdev->start_resync_work); } int w_start_resync(struct drbd_work *w, int cancel) @@ -1706,7 +1706,7 @@ int drbd_worker(struct drbd_thread *thi) /* as long as we use drbd_queue_work_front(), * we may only dequeue single work items here, not batches. */ if (list_empty(&work_list)) - dequeue_work_item(&tconn->data.work, &work_list); + dequeue_work_item(&tconn->sender_work, &work_list); /* Still nothing to do? Poke TCP, just in case, * then wait for new work (or signal). */ @@ -1721,8 +1721,8 @@ int drbd_worker(struct drbd_thread *thi) drbd_tcp_uncork(tconn->data.socket); mutex_unlock(&tconn->data.mutex); - wait_event_interruptible(tconn->data.work.q_wait, - dequeue_work_item(&tconn->data.work, &work_list)); + wait_event_interruptible(tconn->sender_work.q_wait, + dequeue_work_item(&tconn->sender_work, &work_list)); mutex_lock(&tconn->data.mutex); if (tconn->data.socket && cork) @@ -1758,7 +1758,7 @@ int drbd_worker(struct drbd_thread *thi) list_del_init(&w->list); w->cb(w, 1); } - dequeue_work_batch(&tconn->data.work, &work_list); + dequeue_work_batch(&tconn->sender_work, &work_list); } while (!list_empty(&work_list)); rcu_read_lock(); From b6dd1a89767bc33e9c98b3195f8925b46c5c95f3 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 28 Nov 2011 15:04:49 +0100 Subject: [PATCH 526/609] drbd: remove struct drbd_tl_epoch objects (barrier works) cherry-picked and adapted from drbd 9 devel branch DRBD requests (struct drbd_request) are already on the per resource transfer log list, and carry their epoch number. We do not need to additionally link them on other ring lists in other structs. The drbd sender thread can recognize itself when to send a P_BARRIER, by tracking the currently processed epoch, and how many writes have been processed for that epoch. If the epoch of the request to be processed does not match the currently processed epoch, any writes have been processed in it, a P_BARRIER for this last processed epoch is send out first. The new epoch then becomes the currently processed epoch. To not get stuck in drbd_al_begin_io() waiting for P_BARRIER_ACK, the sender thread also needs to handle the case when the current epoch was closed already, but no new requests are queued yet, and send out P_BARRIER as soon as possible. This is done by comparing the per resource "current transfer log epoch" (tconn->current_tle_nr) with the per connection "currently processed epoch number" (tconn->send.current_epoch_nr), while waiting for new requests to be processed in wait_for_work(). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 45 +++-- drivers/block/drbd/drbd_main.c | 312 ++++++----------------------- drivers/block/drbd/drbd_nl.c | 8 + drivers/block/drbd/drbd_receiver.c | 1 + drivers/block/drbd/drbd_req.c | 157 ++++----------- drivers/block/drbd/drbd_worker.c | 194 +++++++++++++----- 6 files changed, 289 insertions(+), 428 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c0d0de54ae5..309c121557a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -562,12 +562,16 @@ struct drbd_request { struct bio *private_bio; struct drbd_interval i; - unsigned int epoch; /* barrier_nr */ - /* barrier_nr: used to check on "completion" whether this req was in + /* epoch: used to check on "completion" whether this req was in * the current epoch, and we therefore have to close it, - * starting a new epoch... + * causing a p_barrier packet to be send, starting a new epoch. + * + * This corresponds to "barrier" in struct p_barrier[_ack], + * and to "barrier_nr" in struct drbd_epoch (and various + * comments/function parameters/local variable names). */ + unsigned int epoch; struct list_head tl_requests; /* ring list in the transfer log */ struct bio *master_bio; /* master bio pointer */ @@ -575,14 +579,6 @@ struct drbd_request { unsigned long start_time; }; -struct drbd_tl_epoch { - struct drbd_work w; - struct list_head requests; /* requests before */ - struct drbd_tl_epoch *next; /* pointer to the next barrier */ - unsigned int br_number; /* the barriers identifier. */ - int n_writes; /* number of requests attached before this barrier */ -}; - struct drbd_epoch { struct drbd_tconn *tconn; struct list_head list; @@ -845,11 +841,8 @@ struct drbd_tconn { /* is a resource from the config file */ unsigned int ko_count; spinlock_t req_lock; - struct drbd_tl_epoch *unused_spare_tle; /* for pre-allocation */ - struct drbd_tl_epoch *newest_tle; - struct drbd_tl_epoch *oldest_tle; - struct list_head out_of_sequence_requests; - struct list_head barrier_acked_requests; + + struct list_head transfer_log; /* all requests not yet fully processed */ struct crypto_hash *cram_hmac_tfm; struct crypto_hash *integrity_tfm; /* checksums we compute, updates protected by tconn->data->mutex */ @@ -859,18 +852,36 @@ struct drbd_tconn { /* is a resource from the config file */ void *int_dig_in; void *int_dig_vv; + /* receiver side */ struct drbd_epoch *current_epoch; spinlock_t epoch_lock; unsigned int epochs; enum write_ordering_e write_ordering; atomic_t current_tle_nr; /* transfer log epoch number */ + unsigned current_tle_writes; /* writes seen within this tl epoch */ unsigned long last_reconnect_jif; struct drbd_thread receiver; struct drbd_thread worker; struct drbd_thread asender; cpumask_var_t cpu_mask; + + /* sender side */ struct drbd_work_queue sender_work; + + struct { + /* whether this sender thread + * has processed a single write yet. */ + bool seen_any_write_yet; + + /* Which barrier number to send with the next P_BARRIER */ + int current_epoch_nr; + + /* how many write requests have been sent + * with req->epoch == current_epoch_nr. + * If none, no P_BARRIER will be sent. */ + unsigned current_epoch_writes; + } send; }; struct drbd_conf { @@ -1054,7 +1065,6 @@ extern void drbd_calc_cpu_mask(struct drbd_tconn *tconn); extern void tl_release(struct drbd_tconn *, unsigned int barrier_nr, unsigned int set_size); extern void tl_clear(struct drbd_tconn *); -extern void _tl_add_barrier(struct drbd_tconn *, struct drbd_tl_epoch *); extern void drbd_free_sock(struct drbd_tconn *tconn); extern int drbd_send(struct drbd_tconn *tconn, struct socket *sock, void *buf, size_t size, unsigned msg_flags); @@ -1460,7 +1470,6 @@ extern int w_resync_timer(struct drbd_work *, int); extern int w_send_write_hint(struct drbd_work *, int); extern int w_make_resync_request(struct drbd_work *, int); extern int w_send_dblock(struct drbd_work *, int); -extern int w_send_barrier(struct drbd_work *, int); extern int w_send_read_req(struct drbd_work *, int); extern int w_prev_work_done(struct drbd_work *, int); extern int w_e_reissue(struct drbd_work *, int); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 7e37149684e..8c6c48e363c 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -188,147 +188,75 @@ int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) #endif /** - * DOC: The transfer log - * - * The transfer log is a single linked list of &struct drbd_tl_epoch objects. - * mdev->tconn->newest_tle points to the head, mdev->tconn->oldest_tle points to the tail - * of the list. There is always at least one &struct drbd_tl_epoch object. - * - * Each &struct drbd_tl_epoch has a circular double linked list of requests - * attached. - */ -static int tl_init(struct drbd_tconn *tconn) -{ - struct drbd_tl_epoch *b; - - /* during device minor initialization, we may well use GFP_KERNEL */ - b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_KERNEL); - if (!b) - return 0; - INIT_LIST_HEAD(&b->requests); - INIT_LIST_HEAD(&b->w.list); - b->next = NULL; - b->br_number = atomic_inc_return(&tconn->current_tle_nr); - b->n_writes = 0; - b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */ - - tconn->oldest_tle = b; - tconn->newest_tle = b; - INIT_LIST_HEAD(&tconn->out_of_sequence_requests); - INIT_LIST_HEAD(&tconn->barrier_acked_requests); - - return 1; -} - -static void tl_cleanup(struct drbd_tconn *tconn) -{ - if (tconn->oldest_tle != tconn->newest_tle) - conn_err(tconn, "ASSERT FAILED: oldest_tle == newest_tle\n"); - if (!list_empty(&tconn->out_of_sequence_requests)) - conn_err(tconn, "ASSERT FAILED: list_empty(out_of_sequence_requests)\n"); - kfree(tconn->oldest_tle); - tconn->oldest_tle = NULL; - kfree(tconn->unused_spare_tle); - tconn->unused_spare_tle = NULL; -} - -/** - * _tl_add_barrier() - Adds a barrier to the transfer log - * @mdev: DRBD device. - * @new: Barrier to be added before the current head of the TL. - * - * The caller must hold the req_lock. - */ -void _tl_add_barrier(struct drbd_tconn *tconn, struct drbd_tl_epoch *new) -{ - INIT_LIST_HEAD(&new->requests); - INIT_LIST_HEAD(&new->w.list); - new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */ - new->next = NULL; - new->n_writes = 0; - - new->br_number = atomic_inc_return(&tconn->current_tle_nr); - if (tconn->newest_tle != new) { - tconn->newest_tle->next = new; - tconn->newest_tle = new; - } -} - -/** - * tl_release() - Free or recycle the oldest &struct drbd_tl_epoch object of the TL - * @mdev: DRBD device. + * tl_release() - mark as BARRIER_ACKED all requests in the corresponding transfer log epoch + * @tconn: DRBD connection. * @barrier_nr: Expected identifier of the DRBD write barrier packet. * @set_size: Expected number of requests before that barrier. * * In case the passed barrier_nr or set_size does not match the oldest - * &struct drbd_tl_epoch objects this function will cause a termination - * of the connection. + * epoch of not yet barrier-acked requests, this function will cause a + * termination of the connection. */ void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr, unsigned int set_size) { - struct drbd_conf *mdev; - struct drbd_tl_epoch *b, *nob; /* next old barrier */ - struct list_head *le, *tle; struct drbd_request *r; + struct drbd_request *req = NULL; + int expect_epoch = 0; + int expect_size = 0; spin_lock_irq(&tconn->req_lock); - b = tconn->oldest_tle; + /* find latest not yet barrier-acked write request, + * count writes in its epoch. */ + list_for_each_entry(r, &tconn->transfer_log, tl_requests) { + const unsigned long s = r->rq_state; + if (!req) { + if (!(s & RQ_WRITE)) + continue; + if (!(s & RQ_NET_MASK)) + continue; + if (s & RQ_NET_DONE) + continue; + req = r; + expect_epoch = req->epoch; + expect_size ++; + } else { + if (r->epoch != expect_epoch) + break; + if (!(s & RQ_WRITE)) + continue; + /* if (s & RQ_DONE): not expected */ + /* if (!(s & RQ_NET_MASK)): not expected */ + expect_size++; + } + } /* first some paranoia code */ - if (b == NULL) { + if (req == NULL) { conn_err(tconn, "BAD! BarrierAck #%u received, but no epoch in tl!?\n", barrier_nr); goto bail; } - if (b->br_number != barrier_nr) { + if (expect_epoch != barrier_nr) { conn_err(tconn, "BAD! BarrierAck #%u received, expected #%u!\n", - barrier_nr, b->br_number); + barrier_nr, expect_epoch); goto bail; } - if (b->n_writes != set_size) { + + if (expect_size != set_size) { conn_err(tconn, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n", - barrier_nr, set_size, b->n_writes); + barrier_nr, set_size, expect_size); goto bail; } /* Clean up list of requests processed during current epoch */ - list_for_each_safe(le, tle, &b->requests) { - r = list_entry(le, struct drbd_request, tl_requests); - _req_mod(r, BARRIER_ACKED); + list_for_each_entry_safe(req, r, &tconn->transfer_log, tl_requests) { + if (req->epoch != expect_epoch) + break; + _req_mod(req, BARRIER_ACKED); } - /* There could be requests on the list waiting for completion - of the write to the local disk. To avoid corruptions of - slab's data structures we have to remove the lists head. - - Also there could have been a barrier ack out of sequence, overtaking - the write acks - which would be a bug and violating write ordering. - To not deadlock in case we lose connection while such requests are - still pending, we need some way to find them for the - _req_mode(CONNECTION_LOST_WHILE_PENDING). - - These have been list_move'd to the out_of_sequence_requests list in - _req_mod(, BARRIER_ACKED) above. - */ - list_splice_init(&b->requests, &tconn->barrier_acked_requests); - mdev = b->w.mdev; - - nob = b->next; - if (test_and_clear_bit(CREATE_BARRIER, &tconn->flags)) { - _tl_add_barrier(tconn, b); - if (nob) - tconn->oldest_tle = nob; - /* if nob == NULL b was the only barrier, and becomes the new - barrier. Therefore tconn->oldest_tle points already to b */ - } else { - D_ASSERT(nob != NULL); - tconn->oldest_tle = nob; - kfree(b); - } - spin_unlock_irq(&tconn->req_lock); - dec_ap_pending(mdev); return; @@ -346,91 +274,20 @@ bail: * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO, * RESTART_FROZEN_DISK_IO. */ +/* must hold resource->req_lock */ void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) { - struct drbd_tl_epoch *b, *tmp, **pn; - struct list_head *le, *tle, carry_reads; - struct drbd_request *req; - int rv, n_writes, n_reads; + struct drbd_request *req, *r; - b = tconn->oldest_tle; - pn = &tconn->oldest_tle; - while (b) { - n_writes = 0; - n_reads = 0; - INIT_LIST_HEAD(&carry_reads); - list_for_each_safe(le, tle, &b->requests) { - req = list_entry(le, struct drbd_request, tl_requests); - rv = _req_mod(req, what); + list_for_each_entry_safe(req, r, &tconn->transfer_log, tl_requests) + _req_mod(req, what); +} - if (rv & MR_WRITE) - n_writes++; - if (rv & MR_READ) - n_reads++; - } - tmp = b->next; - - if (n_writes) { - if (what == RESEND) { - b->n_writes = n_writes; - if (b->w.cb == NULL) { - b->w.cb = w_send_barrier; - inc_ap_pending(b->w.mdev); - set_bit(CREATE_BARRIER, &tconn->flags); - } - - drbd_queue_work(&tconn->sender_work, &b->w); - } - pn = &b->next; - } else { - if (n_reads) - list_add(&carry_reads, &b->requests); - /* there could still be requests on that ring list, - * in case local io is still pending */ - list_del(&b->requests); - - /* dec_ap_pending corresponding to queue_barrier. - * the newest barrier may not have been queued yet, - * in which case w.cb is still NULL. */ - if (b->w.cb != NULL) - dec_ap_pending(b->w.mdev); - - if (b == tconn->newest_tle) { - /* recycle, but reinit! */ - if (tmp != NULL) - conn_err(tconn, "ASSERT FAILED tmp == NULL"); - INIT_LIST_HEAD(&b->requests); - list_splice(&carry_reads, &b->requests); - INIT_LIST_HEAD(&b->w.list); - b->w.cb = NULL; - b->br_number = atomic_inc_return(&tconn->current_tle_nr); - b->n_writes = 0; - - *pn = b; - break; - } - *pn = tmp; - kfree(b); - } - b = tmp; - list_splice(&carry_reads, &b->requests); - } - - /* Actions operating on the disk state, also want to work on - requests that got barrier acked. */ - switch (what) { - case FAIL_FROZEN_DISK_IO: - case RESTART_FROZEN_DISK_IO: - list_for_each_safe(le, tle, &tconn->barrier_acked_requests) { - req = list_entry(le, struct drbd_request, tl_requests); - _req_mod(req, what); - } - case CONNECTION_LOST_WHILE_PENDING: - case RESEND: - break; - default: - conn_err(tconn, "what = %d in _tl_restart()\n", what); - } +void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) +{ + spin_lock_irq(&tconn->req_lock); + _tl_restart(tconn, what); + spin_unlock_irq(&tconn->req_lock); } /** @@ -443,36 +300,7 @@ void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) */ void tl_clear(struct drbd_tconn *tconn) { - struct list_head *le, *tle; - struct drbd_request *r; - - spin_lock_irq(&tconn->req_lock); - - _tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING); - - /* we expect this list to be empty. */ - if (!list_empty(&tconn->out_of_sequence_requests)) - conn_err(tconn, "ASSERT FAILED list_empty(&out_of_sequence_requests)\n"); - - /* but just in case, clean it up anyways! */ - list_for_each_safe(le, tle, &tconn->out_of_sequence_requests) { - r = list_entry(le, struct drbd_request, tl_requests); - /* It would be nice to complete outside of spinlock. - * But this is easier for now. */ - _req_mod(r, CONNECTION_LOST_WHILE_PENDING); - } - - /* ensure bit indicating barrier is required is clear */ - clear_bit(CREATE_BARRIER, &tconn->flags); - - spin_unlock_irq(&tconn->req_lock); -} - -void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) -{ - spin_lock_irq(&tconn->req_lock); - _tl_restart(tconn, what); - spin_unlock_irq(&tconn->req_lock); + tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING); } /** @@ -482,31 +310,16 @@ void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) void tl_abort_disk_io(struct drbd_conf *mdev) { struct drbd_tconn *tconn = mdev->tconn; - struct drbd_tl_epoch *b; - struct list_head *le, *tle; - struct drbd_request *req; + struct drbd_request *req, *r; spin_lock_irq(&tconn->req_lock); - b = tconn->oldest_tle; - while (b) { - list_for_each_safe(le, tle, &b->requests) { - req = list_entry(le, struct drbd_request, tl_requests); - if (!(req->rq_state & RQ_LOCAL_PENDING)) - continue; - if (req->w.mdev == mdev) - _req_mod(req, ABORT_DISK_IO); - } - b = b->next; - } - - list_for_each_safe(le, tle, &tconn->barrier_acked_requests) { - req = list_entry(le, struct drbd_request, tl_requests); + list_for_each_entry_safe(req, r, &tconn->transfer_log, tl_requests) { if (!(req->rq_state & RQ_LOCAL_PENDING)) continue; - if (req->w.mdev == mdev) - _req_mod(req, ABORT_DISK_IO); + if (req->w.mdev != mdev) + continue; + _req_mod(req, ABORT_DISK_IO); } - spin_unlock_irq(&tconn->req_lock); } @@ -2680,17 +2493,21 @@ struct drbd_tconn *conn_create(const char *name, struct res_opts *res_opts) if (set_resource_options(tconn, res_opts)) goto fail; - if (!tl_init(tconn)) - goto fail; - tconn->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL); if (!tconn->current_epoch) goto fail; + + INIT_LIST_HEAD(&tconn->transfer_log); + INIT_LIST_HEAD(&tconn->current_epoch->list); tconn->epochs = 1; spin_lock_init(&tconn->epoch_lock); tconn->write_ordering = WO_bdev_flush; + tconn->send.seen_any_write_yet = false; + tconn->send.current_epoch_nr = 0; + tconn->send.current_epoch_writes = 0; + tconn->cstate = C_STANDALONE; mutex_init(&tconn->cstate_mutex); spin_lock_init(&tconn->req_lock); @@ -2713,7 +2530,6 @@ struct drbd_tconn *conn_create(const char *name, struct res_opts *res_opts) fail: kfree(tconn->current_epoch); - tl_cleanup(tconn); free_cpumask_var(tconn->cpu_mask); drbd_free_socket(&tconn->meta); drbd_free_socket(&tconn->data); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index c5d4fac1a11..bbc5c2f4a9b 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -622,6 +622,8 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) /* Wait until nothing is on the fly :) */ wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0); + /* FIXME also wait for all pending P_BARRIER_ACK? */ + if (new_role == R_SECONDARY) { set_disk_ro(mdev->vdisk, true); if (get_ldev(mdev)) { @@ -1436,6 +1438,12 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) drbd_suspend_io(mdev); /* also wait for the last barrier ack. */ + /* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171 + * We need a way to either ignore barrier acks for barriers sent before a device + * was attached, or a way to wait for all pending barrier acks to come in. + * As barriers are counted per resource, + * we'd need to suspend io on all devices of a resource. + */ wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || drbd_suspended(mdev)); /* and for any other previously queued work */ drbd_flush_workqueue(mdev); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 34fc33b5eb4..7fe6b01618d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4451,6 +4451,7 @@ static void conn_disconnect(struct drbd_tconn *tconn) conn_err(tconn, "ASSERTION FAILED: tconn->current_epoch->list not empty\n"); /* ok, no more ee's on the fly, it is safe to reset the epoch_size */ atomic_set(&tconn->current_epoch->epoch_size, 0); + tconn->send.seen_any_write_yet = false; conn_info(tconn, "Connection closed\n"); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index e609557a942..ca28b56b7a2 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -149,46 +149,16 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const drbd_req_free(req); } -static void queue_barrier(struct drbd_conf *mdev) -{ - struct drbd_tl_epoch *b; - struct drbd_tconn *tconn = mdev->tconn; - - /* We are within the req_lock. Once we queued the barrier for sending, - * we set the CREATE_BARRIER bit. It is cleared as soon as a new - * barrier/epoch object is added. This is the only place this bit is - * set. It indicates that the barrier for this epoch is already queued, - * and no new epoch has been created yet. */ - if (test_bit(CREATE_BARRIER, &tconn->flags)) - return; - - b = tconn->newest_tle; - b->w.cb = w_send_barrier; - b->w.mdev = mdev; - /* inc_ap_pending done here, so we won't - * get imbalanced on connection loss. - * dec_ap_pending will be done in got_BarrierAck - * or (on connection loss) in tl_clear. */ - inc_ap_pending(mdev); - drbd_queue_work(&tconn->sender_work, &b->w); - set_bit(CREATE_BARRIER, &tconn->flags); +static void wake_all_senders(struct drbd_tconn *tconn) { + wake_up(&tconn->sender_work.q_wait); } -static void _about_to_complete_local_write(struct drbd_conf *mdev, - struct drbd_request *req) +/* must hold resource->req_lock */ +static void start_new_tl_epoch(struct drbd_tconn *tconn) { - const unsigned long s = req->rq_state; - - /* Before we can signal completion to the upper layers, - * we may need to close the current epoch. - * We can skip this, if this request has not even been sent, because we - * did not have a fully established connection yet/anymore, during - * bitmap exchange, or while we are C_AHEAD due to congestion policy. - */ - if (mdev->state.conn >= C_CONNECTED && - (s & RQ_NET_SENT) != 0 && - req->epoch == atomic_read(&mdev->tconn->current_tle_nr)) - queue_barrier(mdev); + tconn->current_tle_writes = 0; + atomic_inc(&tconn->current_tle_nr); + wake_all_senders(tconn); } void complete_master_bio(struct drbd_conf *mdev, @@ -320,9 +290,16 @@ void req_may_be_completed(struct drbd_request *req, struct bio_and_error *m) } else if (!(s & RQ_POSTPONED)) D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0); - /* for writes we need to do some extra housekeeping */ - if (rw == WRITE) - _about_to_complete_local_write(mdev, req); + /* Before we can signal completion to the upper layers, + * we may need to close the current transfer log epoch. + * We are within the request lock, so we can simply compare + * the request epoch number with the current transfer log + * epoch number. If they match, increase the current_tle_nr, + * and reset the transfer log epoch write_cnt. + */ + if (rw == WRITE && + req->epoch == atomic_read(&mdev->tconn->current_tle_nr)) + start_new_tl_epoch(mdev->tconn); /* Update disk stats */ _drbd_end_io_acct(mdev, req); @@ -514,15 +491,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * hurting performance. */ set_bit(UNPLUG_REMOTE, &mdev->flags); - /* see __drbd_make_request, - * just after it grabs the req_lock */ - D_ASSERT(test_bit(CREATE_BARRIER, &mdev->tconn->flags) == 0); - - req->epoch = atomic_read(&mdev->tconn->current_tle_nr); - - /* increment size of current epoch */ - mdev->tconn->newest_tle->n_writes++; - /* queue work item to send data */ D_ASSERT(req->rq_state & RQ_NET_PENDING); req->rq_state |= RQ_NET_QUEUED; @@ -534,8 +502,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, nc = rcu_dereference(mdev->tconn->net_conf); p = nc->max_epoch_size; rcu_read_unlock(); - if (mdev->tconn->newest_tle->n_writes >= p) - queue_barrier(mdev); + if (mdev->tconn->current_tle_writes >= p) + start_new_tl_epoch(mdev->tconn); break; @@ -692,6 +660,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, During connection handshake, we ensure that the peer was not rebooted. */ if (!(req->rq_state & RQ_NET_OK)) { if (req->w.cb) { + /* w.cb expected to be w_send_dblock, or w_send_read_req */ drbd_queue_work(&mdev->tconn->sender_work, &req->w); rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ; } @@ -708,7 +677,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * this is bad, because if the connection is lost now, * we won't be able to clean them up... */ dev_err(DEV, "FIXME (BARRIER_ACKED but pending)\n"); - list_move(&req->tl_requests, &mdev->tconn->out_of_sequence_requests); } if ((req->rq_state & RQ_NET_MASK) != 0) { req->rq_state |= RQ_NET_DONE; @@ -835,7 +803,6 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s const int rw = bio_rw(bio); const int size = bio->bi_size; const sector_t sector = bio->bi_sector; - struct drbd_tl_epoch *b = NULL; struct drbd_request *req; struct net_conf *nc; int local, remote, send_oos = 0; @@ -916,24 +883,6 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s goto fail_free_complete; } - /* For WRITE request, we have to make sure that we have an - * unused_spare_tle, in case we need to start a new epoch. - * I try to be smart and avoid to pre-allocate always "just in case", - * but there is a race between testing the bit and pointer outside the - * spinlock, and grabbing the spinlock. - * if we lost that race, we retry. */ - if (rw == WRITE && (remote || send_oos) && - mdev->tconn->unused_spare_tle == NULL && - test_bit(CREATE_BARRIER, &mdev->tconn->flags)) { -allocate_barrier: - b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_NOIO); - if (!b) { - dev_err(DEV, "Failed to alloc barrier.\n"); - err = -ENOMEM; - goto fail_free_complete; - } - } - /* GOOD, everything prepared, grab the spin_lock */ spin_lock_irq(&mdev->tconn->req_lock); @@ -969,42 +918,9 @@ allocate_barrier: } } - if (b && mdev->tconn->unused_spare_tle == NULL) { - mdev->tconn->unused_spare_tle = b; - b = NULL; - } - if (rw == WRITE && (remote || send_oos) && - mdev->tconn->unused_spare_tle == NULL && - test_bit(CREATE_BARRIER, &mdev->tconn->flags)) { - /* someone closed the current epoch - * while we were grabbing the spinlock */ - spin_unlock_irq(&mdev->tconn->req_lock); - goto allocate_barrier; - } - - /* Update disk stats */ _drbd_start_io_acct(mdev, req, bio); - /* _maybe_start_new_epoch(mdev); - * If we need to generate a write barrier packet, we have to add the - * new epoch (barrier) object, and queue the barrier packet for sending, - * and queue the req's data after it _within the same lock_, otherwise - * we have race conditions were the reorder domains could be mixed up. - * - * Even read requests may start a new epoch and queue the corresponding - * barrier packet. To get the write ordering right, we only have to - * make sure that, if this is a write request and it triggered a - * barrier packet, this request is queued within the same spinlock. */ - if ((remote || send_oos) && mdev->tconn->unused_spare_tle && - test_and_clear_bit(CREATE_BARRIER, &mdev->tconn->flags)) { - _tl_add_barrier(mdev->tconn, mdev->tconn->unused_spare_tle); - mdev->tconn->unused_spare_tle = NULL; - } else { - D_ASSERT(!(remote && rw == WRITE && - test_bit(CREATE_BARRIER, &mdev->tconn->flags))); - } - /* NOTE * Actually, 'local' may be wrong here already, since we may have failed * to write to the meta data, and may become wrong anytime because of @@ -1025,7 +941,12 @@ allocate_barrier: if (local) _req_mod(req, TO_BE_SUBMITTED); - list_add_tail(&req->tl_requests, &mdev->tconn->newest_tle->requests); + /* which transfer log epoch does this belong to? */ + req->epoch = atomic_read(&mdev->tconn->current_tle_nr); + if (rw == WRITE) + mdev->tconn->current_tle_writes++; + + list_add_tail(&req->tl_requests, &mdev->tconn->transfer_log); /* NOTE remote first: to get the concurrent write detection right, * we must register the request before start of local IO. */ @@ -1059,7 +980,9 @@ allocate_barrier: } if (congested) { - queue_barrier(mdev); /* last barrier, after mirrored writes */ + if (mdev->tconn->current_tle_writes) + /* start a new epoch for non-mirrored writes */ + start_new_tl_epoch(mdev->tconn); if (nc->on_congestion == OC_PULL_AHEAD) _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL); @@ -1070,7 +993,6 @@ allocate_barrier: rcu_read_unlock(); spin_unlock_irq(&mdev->tconn->req_lock); - kfree(b); /* if someone else has beaten us to it... */ if (local) { req->private_bio->bi_bdev = mdev->ldev->backing_bdev; @@ -1108,7 +1030,6 @@ fail_and_free_req: drbd_req_free(req); dec_ap_bio(mdev); - kfree(b); return ret; } @@ -1164,12 +1085,23 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct return limit; } +struct drbd_request *find_oldest_request(struct drbd_tconn *tconn) +{ + /* Walk the transfer log, + * and find the oldest not yet completed request */ + struct drbd_request *r; + list_for_each_entry(r, &tconn->transfer_log, tl_requests) { + if (r->rq_state & (RQ_NET_PENDING|RQ_LOCAL_PENDING)) + return r; + } + return NULL; +} + void request_timer_fn(unsigned long data) { struct drbd_conf *mdev = (struct drbd_conf *) data; struct drbd_tconn *tconn = mdev->tconn; struct drbd_request *req; /* oldest request */ - struct list_head *le; struct net_conf *nc; unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */ unsigned long now; @@ -1193,16 +1125,13 @@ void request_timer_fn(unsigned long data) now = jiffies; spin_lock_irq(&tconn->req_lock); - le = &tconn->oldest_tle->requests; - if (list_empty(le)) { + req = find_oldest_request(tconn); + if (!req) { spin_unlock_irq(&tconn->req_lock); mod_timer(&mdev->request_timer, now + et); return; } - le = le->prev; - req = list_entry(le, struct drbd_request, tl_requests); - /* The request is considered timed out, if * - we have some effective timeout from the configuration, * with above state restrictions applied, diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 39ece3a2f53..66be3910e8d 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1210,34 +1210,25 @@ int w_prev_work_done(struct drbd_work *w, int cancel) return 0; } -int w_send_barrier(struct drbd_work *w, int cancel) +/* FIXME + * We need to track the number of pending barrier acks, + * and to be able to wait for them. + * See also comment in drbd_adm_attach before drbd_suspend_io. + */ +int drbd_send_barrier(struct drbd_tconn *tconn) { - struct drbd_socket *sock; - struct drbd_tl_epoch *b = container_of(w, struct drbd_tl_epoch, w); - struct drbd_conf *mdev = w->mdev; struct p_barrier *p; + struct drbd_socket *sock; - /* really avoid racing with tl_clear. w.cb may have been referenced - * just before it was reassigned and re-queued, so double check that. - * actually, this race was harmless, since we only try to send the - * barrier packet here, and otherwise do nothing with the object. - * but compare with the head of w_clear_epoch */ - spin_lock_irq(&mdev->tconn->req_lock); - if (w->cb != w_send_barrier || mdev->state.conn < C_CONNECTED) - cancel = 1; - spin_unlock_irq(&mdev->tconn->req_lock); - if (cancel) - return 0; - - sock = &mdev->tconn->data; - p = drbd_prepare_command(mdev, sock); + sock = &tconn->data; + p = conn_prepare_command(tconn, sock); if (!p) return -EIO; - p->barrier = b->br_number; - /* inc_ap_pending was done where this was queued. - * dec_ap_pending will be done in got_BarrierAck - * or (on connection loss) in w_clear_epoch. */ - return drbd_send_command(mdev, sock, P_BARRIER, sizeof(*p), NULL, 0); + p->barrier = tconn->send.current_epoch_nr; + p->pad = 0; + tconn->send.current_epoch_writes = 0; + + return conn_send_command(tconn, sock, P_BARRIER, sizeof(*p), NULL, 0); } int w_send_write_hint(struct drbd_work *w, int cancel) @@ -1257,6 +1248,7 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); struct drbd_conf *mdev = w->mdev; + struct drbd_tconn *tconn = mdev->tconn; int err; if (unlikely(cancel)) { @@ -1264,6 +1256,20 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel) return 0; } + if (!tconn->send.seen_any_write_yet) { + tconn->send.seen_any_write_yet = true; + tconn->send.current_epoch_nr = req->epoch; + } + if (tconn->send.current_epoch_nr != req->epoch) { + if (tconn->send.current_epoch_writes) + drbd_send_barrier(tconn); + tconn->send.current_epoch_nr = req->epoch; + } + /* this time, no tconn->send.current_epoch_writes++; + * If it was sent, it was the closing barrier for the last + * replicated epoch, before we went into AHEAD mode. + * No more barriers will be sent, until we leave AHEAD mode again. */ + err = drbd_send_out_of_sync(mdev, req); req_mod(req, OOS_HANDED_TO_NETWORK); @@ -1280,6 +1286,7 @@ int w_send_dblock(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); struct drbd_conf *mdev = w->mdev; + struct drbd_tconn *tconn = mdev->tconn; int err; if (unlikely(cancel)) { @@ -1287,6 +1294,17 @@ int w_send_dblock(struct drbd_work *w, int cancel) return 0; } + if (!tconn->send.seen_any_write_yet) { + tconn->send.seen_any_write_yet = true; + tconn->send.current_epoch_nr = req->epoch; + } + if (tconn->send.current_epoch_nr != req->epoch) { + if (tconn->send.current_epoch_writes) + drbd_send_barrier(tconn); + tconn->send.current_epoch_nr = req->epoch; + } + tconn->send.current_epoch_writes++; + err = drbd_send_dblock(mdev, req); req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); @@ -1303,6 +1321,7 @@ int w_send_read_req(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); struct drbd_conf *mdev = w->mdev; + struct drbd_tconn *tconn = mdev->tconn; int err; if (unlikely(cancel)) { @@ -1310,6 +1329,15 @@ int w_send_read_req(struct drbd_work *w, int cancel) return 0; } + /* Even read requests may close a write epoch, + * if there was any yet. */ + if (tconn->send.seen_any_write_yet && + tconn->send.current_epoch_nr != req->epoch) { + if (tconn->send.current_epoch_writes) + drbd_send_barrier(tconn); + tconn->send.current_epoch_nr = req->epoch; + } + err = drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size, (unsigned long)req); @@ -1673,6 +1701,34 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) mutex_unlock(mdev->state_mutex); } +/* If the resource already closed the current epoch, but we did not + * (because we have not yet seen new requests), we should send the + * corresponding barrier now. Must be checked within the same spinlock + * that is used to check for new requests. */ +bool need_to_send_barrier(struct drbd_tconn *connection) +{ + if (!connection->send.seen_any_write_yet) + return false; + + /* Skip barriers that do not contain any writes. + * This may happen during AHEAD mode. */ + if (!connection->send.current_epoch_writes) + return false; + + /* ->req_lock is held when requests are queued on + * connection->sender_work, and put into ->transfer_log. + * It is also held when ->current_tle_nr is increased. + * So either there are already new requests queued, + * and corresponding barriers will be send there. + * Or nothing new is queued yet, so the difference will be 1. + */ + if (atomic_read(&connection->current_tle_nr) != + connection->send.current_epoch_nr + 1) + return false; + + return true; +} + bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list) { spin_lock_irq(&queue->q_lock); @@ -1690,15 +1746,79 @@ bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_lis return !list_empty(work_list); } +void wait_for_work(struct drbd_tconn *connection, struct list_head *work_list) +{ + DEFINE_WAIT(wait); + struct net_conf *nc; + int uncork, cork; + + dequeue_work_item(&connection->sender_work, work_list); + if (!list_empty(work_list)) + return; + + /* Still nothing to do? + * Maybe we still need to close the current epoch, + * even if no new requests are queued yet. + * + * Also, poke TCP, just in case. + * Then wait for new work (or signal). */ + rcu_read_lock(); + nc = rcu_dereference(connection->net_conf); + uncork = nc ? nc->tcp_cork : 0; + rcu_read_unlock(); + if (uncork) { + mutex_lock(&connection->data.mutex); + if (connection->data.socket) + drbd_tcp_uncork(connection->data.socket); + mutex_unlock(&connection->data.mutex); + } + + for (;;) { + int send_barrier; + prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE); + spin_lock_irq(&connection->req_lock); + spin_lock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */ + list_splice_init(&connection->sender_work.q, work_list); + spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */ + if (!list_empty(work_list) || signal_pending(current)) { + spin_unlock_irq(&connection->req_lock); + break; + } + send_barrier = need_to_send_barrier(connection); + spin_unlock_irq(&connection->req_lock); + if (send_barrier) { + drbd_send_barrier(connection); + connection->send.current_epoch_nr++; + } + schedule(); + /* may be woken up for other things but new work, too, + * e.g. if the current epoch got closed. + * In which case we send the barrier above. */ + } + finish_wait(&connection->sender_work.q_wait, &wait); + + /* someone may have changed the config while we have been waiting above. */ + rcu_read_lock(); + nc = rcu_dereference(connection->net_conf); + cork = nc ? nc->tcp_cork : 0; + rcu_read_unlock(); + mutex_lock(&connection->data.mutex); + if (connection->data.socket) { + if (cork) + drbd_tcp_cork(connection->data.socket); + else if (!uncork) + drbd_tcp_uncork(connection->data.socket); + } + mutex_unlock(&connection->data.mutex); +} + int drbd_worker(struct drbd_thread *thi) { struct drbd_tconn *tconn = thi->tconn; struct drbd_work *w = NULL; struct drbd_conf *mdev; - struct net_conf *nc; LIST_HEAD(work_list); int vnr; - int cork; while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); @@ -1706,29 +1826,7 @@ int drbd_worker(struct drbd_thread *thi) /* as long as we use drbd_queue_work_front(), * we may only dequeue single work items here, not batches. */ if (list_empty(&work_list)) - dequeue_work_item(&tconn->sender_work, &work_list); - - /* Still nothing to do? Poke TCP, just in case, - * then wait for new work (or signal). */ - if (list_empty(&work_list)) { - mutex_lock(&tconn->data.mutex); - rcu_read_lock(); - nc = rcu_dereference(tconn->net_conf); - cork = nc ? nc->tcp_cork : 0; - rcu_read_unlock(); - - if (tconn->data.socket && cork) - drbd_tcp_uncork(tconn->data.socket); - mutex_unlock(&tconn->data.mutex); - - wait_event_interruptible(tconn->sender_work.q_wait, - dequeue_work_item(&tconn->sender_work, &work_list)); - - mutex_lock(&tconn->data.mutex); - if (tconn->data.socket && cork) - drbd_tcp_cork(tconn->data.socket); - mutex_unlock(&tconn->data.mutex); - } + wait_for_work(tconn, &work_list); if (signal_pending(current)) { flush_signals(current); From 5da9c8364443797ece9393670fb7ab69cff055ed Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 29 Mar 2012 17:04:14 +0200 Subject: [PATCH 527/609] drbd: better separate WRITE and READ code paths in drbd_make_request cherry-picked and adapted from drbd 9 devel branch READs will be interesting to at most one connection, WRITEs should be interesting for all established connections. Introduce some helper functions to hopefully make this easier to follow. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 399 ++++++++++++++++++---------------- 1 file changed, 211 insertions(+), 188 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index ca28b56b7a2..d2d61af034e 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -304,15 +304,21 @@ void req_may_be_completed(struct drbd_request *req, struct bio_and_error *m) /* Update disk stats */ _drbd_end_io_acct(mdev, req); - /* if READ failed, + /* If READ failed, * have it be pushed back to the retry work queue, - * so it will re-enter __drbd_make_request, + * so it will re-enter __drbd_make_request(), * and be re-assigned to a suitable local or remote path, * or failed if we do not have access to good data anymore. - * READA may fail. + * + * Unless it was failed early by __drbd_make_request(), + * because no path was available, in which case + * it was not even added to the transfer_log. + * + * READA may fail, and will not be retried. + * * WRITE should have used all available paths already. */ - if (!ok && rw == READ) + if (!ok && rw == READ && !list_empty(&req->tl_requests)) req->rq_state |= RQ_POSTPONED; if (!(req->rq_state & RQ_POSTPONED)) { @@ -725,19 +731,12 @@ static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int return drbd_bm_count_bits(mdev, sbnr, ebnr) == 0; } -static bool remote_due_to_read_balancing(struct drbd_conf *mdev, sector_t sector) +static bool remote_due_to_read_balancing(struct drbd_conf *mdev, sector_t sector, + enum drbd_read_balancing rbm) { - enum drbd_read_balancing rbm; struct backing_dev_info *bdi; int stripe_shift; - if (mdev->state.pdsk < D_UP_TO_DATE) - return false; - - rcu_read_lock(); - rbm = rcu_dereference(mdev->ldev->disk_conf)->read_balancing; - rcu_read_unlock(); - switch (rbm) { case RB_CONGESTED_REMOTE: bdi = &mdev->ldev->backing_bdev->bd_disk->queue->backing_dev_info; @@ -798,17 +797,160 @@ static void complete_conflicting_writes(struct drbd_request *req) finish_wait(&mdev->misc_wait, &wait); } +/* called within req_lock and rcu_read_lock() */ +static bool conn_check_congested(struct drbd_conf *mdev) +{ + struct drbd_tconn *tconn = mdev->tconn; + struct net_conf *nc; + bool congested = false; + enum drbd_on_congestion on_congestion; + + nc = rcu_dereference(tconn->net_conf); + on_congestion = nc ? nc->on_congestion : OC_BLOCK; + if (on_congestion == OC_BLOCK || + tconn->agreed_pro_version < 96) + return false; + + if (nc->cong_fill && + atomic_read(&mdev->ap_in_flight) >= nc->cong_fill) { + dev_info(DEV, "Congestion-fill threshold reached\n"); + congested = true; + } + + if (mdev->act_log->used >= nc->cong_extents) { + dev_info(DEV, "Congestion-extents threshold reached\n"); + congested = true; + } + + if (congested) { + if (mdev->tconn->current_tle_writes) + /* start a new epoch for non-mirrored writes */ + start_new_tl_epoch(mdev->tconn); + + if (on_congestion == OC_PULL_AHEAD) + _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL); + else /*nc->on_congestion == OC_DISCONNECT */ + _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL); + } + + return congested; +} + +/* If this returns false, and req->private_bio is still set, + * this should be submitted locally. + * + * If it returns false, but req->private_bio is not set, + * we do not have access to good data :( + * + * Otherwise, this destroys req->private_bio, if any, + * and returns true. + */ +static bool do_remote_read(struct drbd_request *req) +{ + struct drbd_conf *mdev = req->w.mdev; + enum drbd_read_balancing rbm; + + if (req->private_bio) { + if (!drbd_may_do_local_read(mdev, + req->i.sector, req->i.size)) { + bio_put(req->private_bio); + req->private_bio = NULL; + put_ldev(mdev); + } + } + + if (mdev->state.pdsk != D_UP_TO_DATE) + return false; + + /* TODO: improve read balancing decisions, take into account drbd + * protocol, pending requests etc. */ + + rcu_read_lock(); + rbm = rcu_dereference(mdev->ldev->disk_conf)->read_balancing; + rcu_read_unlock(); + + if (rbm == RB_PREFER_LOCAL && req->private_bio) + return false; /* submit locally */ + + if (req->private_bio == NULL) + return true; + + if (remote_due_to_read_balancing(mdev, req->i.sector, rbm)) { + if (req->private_bio) { + bio_put(req->private_bio); + req->private_bio = NULL; + put_ldev(mdev); + } + return true; + } + + return false; +} + +/* returns number of connections (== 1, for drbd 8.4) + * expected to actually write this data, + * which does NOT include those that we are L_AHEAD for. */ +static int drbd_process_write_request(struct drbd_request *req) +{ + struct drbd_conf *mdev = req->w.mdev; + int remote, send_oos; + + rcu_read_lock(); + remote = drbd_should_do_remote(mdev->state); + if (remote) { + conn_check_congested(mdev); + remote = drbd_should_do_remote(mdev->state); + } + send_oos = drbd_should_send_out_of_sync(mdev->state); + rcu_read_unlock(); + + if (!remote && !send_oos) + return 0; + + D_ASSERT(!(remote && send_oos)); + + if (remote) { + _req_mod(req, TO_BE_SENT); + _req_mod(req, QUEUE_FOR_NET_WRITE); + } else if (drbd_set_out_of_sync(mdev, req->i.sector, req->i.size)) + _req_mod(req, QUEUE_FOR_SEND_OOS); + + return remote; +} + +static void +drbd_submit_req_private_bio(struct drbd_request *req) +{ + struct drbd_conf *mdev = req->w.mdev; + struct bio *bio = req->private_bio; + const int rw = bio_rw(bio); + + bio->bi_bdev = mdev->ldev->backing_bdev; + + /* State may have changed since we grabbed our reference on the + * ->ldev member. Double check, and short-circuit to endio. + * In case the last activity log transaction failed to get on + * stable storage, and this is a WRITE, we may not even submit + * this bio. */ + if (get_ldev(mdev)) { + if (drbd_insert_fault(mdev, + rw == WRITE ? DRBD_FAULT_DT_WR + : rw == READ ? DRBD_FAULT_DT_RD + : DRBD_FAULT_DT_RA)) + bio_endio(bio, -EIO); + else + generic_make_request(bio); + put_ldev(mdev); + } else + bio_endio(bio, -EIO); +} + int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time) { const int rw = bio_rw(bio); - const int size = bio->bi_size; - const sector_t sector = bio->bi_sector; + struct bio_and_error m = { NULL, }; struct drbd_request *req; - struct net_conf *nc; - int local, remote, send_oos = 0; - int err = 0; - int ret = 0; - union drbd_dev_state s; + bool no_remote = false; /* allocate outside of all locks; */ req = drbd_req_new(mdev, bio); @@ -822,70 +964,23 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s } req->start_time = start_time; - local = get_ldev(mdev); - if (!local) { - bio_put(req->private_bio); /* or we get a bio leak */ + if (!get_ldev(mdev)) { + bio_put(req->private_bio); req->private_bio = NULL; } - if (rw == WRITE) { - remote = 1; - } else { - /* READ || READA */ - if (local) { - if (!drbd_may_do_local_read(mdev, sector, size) || - remote_due_to_read_balancing(mdev, sector)) { - /* we could kick the syncer to - * sync this extent asap, wait for - * it, then continue locally. - * Or just issue the request remotely. - */ - local = 0; - bio_put(req->private_bio); - req->private_bio = NULL; - put_ldev(mdev); - } - } - remote = !local && mdev->state.pdsk >= D_UP_TO_DATE; - } - - /* If we have a disk, but a READA request is mapped to remote, - * we are R_PRIMARY, D_INCONSISTENT, SyncTarget. - * Just fail that READA request right here. - * - * THINK: maybe fail all READA when not local? - * or make this configurable... - * if network is slow, READA won't do any good. - */ - if (rw == READA && mdev->state.disk >= D_INCONSISTENT && !local) { - err = -EWOULDBLOCK; - goto fail_and_free_req; - } /* For WRITES going to the local disk, grab a reference on the target * extent. This waits for any resync activity in the corresponding * resync extent to finish, and, if necessary, pulls in the target * extent into the activity log, which involves further disk io because * of transactional on-disk meta data updates. */ - if (rw == WRITE && local && !test_bit(AL_SUSPENDED, &mdev->flags)) { + if (rw == WRITE && req->private_bio + && !test_bit(AL_SUSPENDED, &mdev->flags)) { req->rq_state |= RQ_IN_ACT_LOG; drbd_al_begin_io(mdev, &req->i); } - s = mdev->state; - remote = remote && drbd_should_do_remote(s); - send_oos = rw == WRITE && drbd_should_send_out_of_sync(s); - D_ASSERT(!(remote && send_oos)); - - if (!(local || remote) && !drbd_suspended(mdev)) { - if (__ratelimit(&drbd_ratelimit_state)) - dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); - err = -EIO; - goto fail_free_complete; - } - - /* GOOD, everything prepared, grab the spin_lock */ spin_lock_irq(&mdev->tconn->req_lock); - if (rw == WRITE) { /* This may temporarily give up the req_lock, * but will re-aquire it before it returns here. @@ -893,53 +988,28 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s complete_conflicting_writes(req); } + /* no more giving up req_lock from now on! */ + if (drbd_suspended(mdev)) { - /* If we got suspended, use the retry mechanism in - drbd_make_request() to restart processing of this - bio. In the next call to drbd_make_request - we sleep in inc_ap_bio() */ - ret = 1; - spin_unlock_irq(&mdev->tconn->req_lock); - goto fail_free_complete; - } - - if (remote || send_oos) { - remote = drbd_should_do_remote(mdev->state); - send_oos = rw == WRITE && drbd_should_send_out_of_sync(mdev->state); - D_ASSERT(!(remote && send_oos)); - - if (!(remote || send_oos)) - dev_warn(DEV, "lost connection while grabbing the req_lock!\n"); - if (!(local || remote)) { - dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); - spin_unlock_irq(&mdev->tconn->req_lock); - err = -EIO; - goto fail_free_complete; + /* push back and retry: */ + req->rq_state |= RQ_POSTPONED; + if (req->private_bio) { + bio_put(req->private_bio); + req->private_bio = NULL; } + goto out; } /* Update disk stats */ _drbd_start_io_acct(mdev, req, bio); - /* NOTE - * Actually, 'local' may be wrong here already, since we may have failed - * to write to the meta data, and may become wrong anytime because of - * local io-error for some other request, which would lead to us - * "detaching" the local disk. - * - * 'remote' may become wrong any time because the network could fail. - * - * This is a harmless race condition, though, since it is handled - * correctly at the appropriate places; so it just defers the failure - * of the respective operation. - */ - - /* mark them early for readability. - * this just sets some state flags. */ - if (remote) - _req_mod(req, TO_BE_SENT); - if (local) - _req_mod(req, TO_BE_SUBMITTED); + /* We fail READ/READA early, if we can not serve it. + * We must do this before req is registered on any lists. + * Otherwise, req_may_be_completed() will queue failed READ for retry. */ + if (rw != WRITE) { + if (!do_remote_read(req) && !req->private_bio) + goto nodata; + } /* which transfer log epoch does this belong to? */ req->epoch = atomic_read(&mdev->tconn->current_tle_nr); @@ -948,90 +1018,43 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s list_add_tail(&req->tl_requests, &mdev->tconn->transfer_log); - /* NOTE remote first: to get the concurrent write detection right, - * we must register the request before start of local IO. */ - if (remote) { - /* either WRITE and C_CONNECTED, - * or READ, and no local disk, - * or READ, but not in sync. - */ - _req_mod(req, (rw == WRITE) - ? QUEUE_FOR_NET_WRITE - : QUEUE_FOR_NET_READ); + if (rw == WRITE) { + if (!drbd_process_write_request(req)) + no_remote = true; + } else { + /* We either have a private_bio, or we can read from remote. + * Otherwise we had done the goto nodata above. */ + if (req->private_bio == NULL) { + _req_mod(req, TO_BE_SENT); + _req_mod(req, QUEUE_FOR_NET_READ); + } else + no_remote = true; } - if (send_oos && drbd_set_out_of_sync(mdev, sector, size)) - _req_mod(req, QUEUE_FOR_SEND_OOS); - rcu_read_lock(); - nc = rcu_dereference(mdev->tconn->net_conf); - if (remote && - nc->on_congestion != OC_BLOCK && mdev->tconn->agreed_pro_version >= 96) { - int congested = 0; - - if (nc->cong_fill && - atomic_read(&mdev->ap_in_flight) >= nc->cong_fill) { - dev_info(DEV, "Congestion-fill threshold reached\n"); - congested = 1; - } - - if (mdev->act_log->used >= nc->cong_extents) { - dev_info(DEV, "Congestion-extents threshold reached\n"); - congested = 1; - } - - if (congested) { - if (mdev->tconn->current_tle_writes) - /* start a new epoch for non-mirrored writes */ - start_new_tl_epoch(mdev->tconn); - - if (nc->on_congestion == OC_PULL_AHEAD) - _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL); - else /*nc->on_congestion == OC_DISCONNECT */ - _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL); - } + if (req->private_bio) { + /* needs to be marked within the same spinlock */ + _req_mod(req, TO_BE_SUBMITTED); + /* but we need to give up the spinlock to submit */ + spin_unlock_irq(&mdev->tconn->req_lock); + drbd_submit_req_private_bio(req); + /* once we have submitted, we must no longer look at req, + * it may already be destroyed. */ + return 0; + } else if (no_remote) { +nodata: + if (__ratelimit(&drbd_ratelimit_state)) + dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); + /* A write may have been queued for send_oos, however. + * So we can not simply free it, we must go through req_may_be_completed() */ } - rcu_read_unlock(); +out: + req_may_be_completed(req, &m); spin_unlock_irq(&mdev->tconn->req_lock); - if (local) { - req->private_bio->bi_bdev = mdev->ldev->backing_bdev; - - /* State may have changed since we grabbed our reference on the - * mdev->ldev member. Double check, and short-circuit to endio. - * In case the last activity log transaction failed to get on - * stable storage, and this is a WRITE, we may not even submit - * this bio. */ - if (get_ldev(mdev)) { - if (drbd_insert_fault(mdev, rw == WRITE ? DRBD_FAULT_DT_WR - : rw == READ ? DRBD_FAULT_DT_RD - : DRBD_FAULT_DT_RA)) - bio_endio(req->private_bio, -EIO); - else - generic_make_request(req->private_bio); - put_ldev(mdev); - } else - bio_endio(req->private_bio, -EIO); - } - + if (m.bio) + complete_master_bio(mdev, &m); return 0; - -fail_free_complete: - if (req->rq_state & RQ_IN_ACT_LOG) - drbd_al_complete_io(mdev, &req->i); -fail_and_free_req: - if (local) { - bio_put(req->private_bio); - req->private_bio = NULL; - put_ldev(mdev); - } - if (!ret) - bio_endio(bio, err); - - drbd_req_free(req); - dec_ap_bio(mdev); - - return ret; } int drbd_make_request(struct request_queue *q, struct bio *bio) From 5df69ece6e93cfd4e09b14bf32bd101df6cbde38 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 24 Jan 2012 16:49:58 +0100 Subject: [PATCH 528/609] drbd: __drbd_make_request() is now void The previous commit causes __drbd_make_request() to always return 0. Change it to void. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 5 ++--- drivers/block/drbd/drbd_req.c | 13 ++++++------- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 309c121557a..8536fabbf98 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1403,7 +1403,7 @@ extern void conn_free_crypto(struct drbd_tconn *tconn); extern int proc_details; /* drbd_req */ -extern int __drbd_make_request(struct drbd_conf *, struct bio *, unsigned long); +extern void __drbd_make_request(struct drbd_conf *, struct bio *, unsigned long); extern int drbd_make_request(struct request_queue *q, struct bio *bio); extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req); extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 8c6c48e363c..bedfeeccd51 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2236,9 +2236,8 @@ static void do_retry(struct work_struct *ws) /* We are not just doing generic_make_request(), * as we want to keep the start_time information. */ - do { - inc_ap_bio(mdev); - } while(__drbd_make_request(mdev, bio, start_time)); + inc_ap_bio(mdev); + __drbd_make_request(mdev, bio, start_time); } } diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index d2d61af034e..6bac415358d 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -945,7 +945,7 @@ drbd_submit_req_private_bio(struct drbd_request *req) bio_endio(bio, -EIO); } -int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time) +void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time) { const int rw = bio_rw(bio); struct bio_and_error m = { NULL, }; @@ -960,7 +960,7 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s * if user cannot handle io errors, that's not our business. */ dev_err(DEV, "could not kmalloc() req\n"); bio_endio(bio, -ENOMEM); - return 0; + return; } req->start_time = start_time; @@ -1039,7 +1039,7 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s drbd_submit_req_private_bio(req); /* once we have submitted, we must no longer look at req, * it may already be destroyed. */ - return 0; + return; } else if (no_remote) { nodata: if (__ratelimit(&drbd_ratelimit_state)) @@ -1054,7 +1054,7 @@ out: if (m.bio) complete_master_bio(mdev, &m); - return 0; + return; } int drbd_make_request(struct request_queue *q, struct bio *bio) @@ -1070,9 +1070,8 @@ int drbd_make_request(struct request_queue *q, struct bio *bio) D_ASSERT(bio->bi_size > 0); D_ASSERT(IS_ALIGNED(bio->bi_size, 512)); - do { - inc_ap_bio(mdev); - } while (__drbd_make_request(mdev, bio, start_time)); + inc_ap_bio(mdev); + __drbd_make_request(mdev, bio, start_time); return 0; } From b406777e6496de346e8ee12fa64e1fe0adc02a78 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 24 Jan 2012 16:58:11 +0100 Subject: [PATCH 529/609] drbd: introduce completion_ref and kref to struct drbd_request cherry-picked and adapted from drbd 9 devel branch completion_ref will count pending events necessary for completion. kref is for destruction. This only introduces these new members of struct drbd_request, a followup patch will make actual use of them. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 5 +++++ drivers/block/drbd/drbd_req.c | 33 +++++++++++++++++++-------------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8536fabbf98..52ad1bfce85 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -577,6 +577,11 @@ struct drbd_request { struct bio *master_bio; /* master bio pointer */ unsigned long rq_state; /* see comments above _req_mod() */ unsigned long start_time; + + /* once it hits 0, we may complete the master_bio */ + atomic_t completion_ref; + /* once it hits 0, we may destroy this drbd_request object */ + struct kref kref; }; struct drbd_epoch { diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 6bac415358d..ae894af428c 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -85,17 +85,15 @@ static struct drbd_request *drbd_req_new(struct drbd_conf *mdev, INIT_LIST_HEAD(&req->tl_requests); INIT_LIST_HEAD(&req->w.list); + atomic_set(&req->completion_ref, 1); + kref_init(&req->kref); return req; } -static void drbd_req_free(struct drbd_request *req) -{ - mempool_free(req, drbd_request_mempool); -} - -/* rw is bio_data_dir(), only READ or WRITE */ -static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw) +static void drbd_req_destroy(struct kref *kref) { + struct drbd_request *req = container_of(kref, struct drbd_request, kref); + struct drbd_conf *mdev = req->w.mdev; const unsigned long s = req->rq_state; /* remove it from the transfer log. @@ -109,7 +107,7 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const /* if it was a write, we may have to set the corresponding * bit(s) out-of-sync first. If it had a local part, we need to * release the reference to the activity log. */ - if (rw == WRITE) { + if (s & RQ_WRITE) { /* Set out-of-sync unless both OK flags are set * (local only or remote failed). * Other places where we set out-of-sync: @@ -146,7 +144,7 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const if (s & RQ_POSTPONED) drbd_restart_request(req); else - drbd_req_free(req); + mempool_free(req, drbd_request_mempool); } static void wake_all_senders(struct drbd_tconn *tconn) { @@ -196,12 +194,10 @@ static void req_may_be_done(struct drbd_request *req) { const unsigned long s = req->rq_state; - struct drbd_conf *mdev = req->w.mdev; - int rw = req->rq_state & RQ_WRITE ? WRITE : READ; /* req->master_bio still present means: Not yet completed. * - * Unless this is RQ_POSTPONED, which will cause _req_is_done() to + * Unless this is RQ_POSTPONED, which will cause drbd_req_destroy() to * queue it on the retry workqueue instead of destroying it. */ if (req->master_bio && !(s & RQ_POSTPONED)) @@ -216,7 +212,7 @@ void req_may_be_done(struct drbd_request *req) /* this is disconnected (local only) operation, * or protocol A, B, or C P_BARRIER_ACK, * or killed from the transfer log due to connection loss. */ - _req_is_done(mdev, req, rw); + kref_put(&req->kref, drbd_req_destroy); } /* else: network part and not DONE yet. that is * protocol A, B, or C, barrier ack still pending... */ @@ -250,6 +246,15 @@ void req_may_be_completed(struct drbd_request *req, struct bio_and_error *m) if (s & RQ_NET_PENDING) return; + /* FIXME + * instead of all the RQ_FLAGS, actually use the completion_ref + * to decide if this is ready to be completed. */ + if (req->master_bio) { + int complete = atomic_dec_and_test(&req->completion_ref); + D_ASSERT(complete != 0); + } else + D_ASSERT(atomic_read(&req->completion_ref) == 0); + if (req->master_bio) { int rw = bio_rw(req->master_bio); @@ -1113,7 +1118,7 @@ struct drbd_request *find_oldest_request(struct drbd_tconn *tconn) * and find the oldest not yet completed request */ struct drbd_request *r; list_for_each_entry(r, &tconn->transfer_log, tl_requests) { - if (r->rq_state & (RQ_NET_PENDING|RQ_LOCAL_PENDING)) + if (atomic_read(&r->completion_ref)) return r; } return NULL; From a0d856dfaed16efb9600b2a7d147cb6dbc11ff94 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 24 Jan 2012 17:19:42 +0100 Subject: [PATCH 530/609] drbd: base completion and destruction of requests on ref counts cherry-picked and adapted from drbd 9 devel branch The logic for when to get or put a reference is in mod_rq_state(). To not get confused in the freeze/thaw respectively resend/restart paths, or when cleaning up requests waiting for P_BARRIER_ACK, this also introduces additional state flags: RQ_COMPLETION_SUSP, and RQ_EXP_BARR_ACK. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 3 +- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_req.c | 532 +++++++++++++++++---------------- drivers/block/drbd/drbd_req.h | 9 + 4 files changed, 284 insertions(+), 262 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 52ad1bfce85..8b26a2c954d 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -575,13 +575,14 @@ struct drbd_request { struct list_head tl_requests; /* ring list in the transfer log */ struct bio *master_bio; /* master bio pointer */ - unsigned long rq_state; /* see comments above _req_mod() */ unsigned long start_time; /* once it hits 0, we may complete the master_bio */ atomic_t completion_ref; /* once it hits 0, we may destroy this drbd_request object */ struct kref kref; + + unsigned rq_state; /* see comments above _req_mod() */ }; struct drbd_epoch { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index bedfeeccd51..d07cb31a36e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -210,7 +210,7 @@ void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr, /* find latest not yet barrier-acked write request, * count writes in its epoch. */ list_for_each_entry(r, &tconn->transfer_log, tl_requests) { - const unsigned long s = r->rq_state; + const unsigned s = r->rq_state; if (!req) { if (!(s & RQ_WRITE)) continue; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index ae894af428c..329528d9dec 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -85,7 +85,9 @@ static struct drbd_request *drbd_req_new(struct drbd_conf *mdev, INIT_LIST_HEAD(&req->tl_requests); INIT_LIST_HEAD(&req->w.list); + /* one reference to be put by __drbd_make_request */ atomic_set(&req->completion_ref, 1); + /* one kref as long as completion_ref > 0 */ kref_init(&req->kref); return req; } @@ -94,7 +96,16 @@ static void drbd_req_destroy(struct kref *kref) { struct drbd_request *req = container_of(kref, struct drbd_request, kref); struct drbd_conf *mdev = req->w.mdev; - const unsigned long s = req->rq_state; + const unsigned s = req->rq_state; + + if ((req->master_bio && !(s & RQ_POSTPONED)) || + atomic_read(&req->completion_ref) || + (s & RQ_LOCAL_PENDING) || + ((s & RQ_NET_MASK) && !(s & RQ_NET_DONE))) { + dev_err(DEV, "drbd_req_destroy: Logic BUG rq_state = 0x%x, completion_ref = %d\n", + s, atomic_read(&req->completion_ref)); + return; + } /* remove it from the transfer log. * well, only if it had been there in the first @@ -180,44 +191,6 @@ static void drbd_remove_request_interval(struct rb_root *root, wake_up(&mdev->misc_wait); } -static void maybe_wakeup_conflicting_requests(struct drbd_request *req) -{ - const unsigned long s = req->rq_state; - if (s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED)) - return; - if (req->i.waiting) - /* Retry all conflicting peer requests. */ - wake_up(&req->w.mdev->misc_wait); -} - -static -void req_may_be_done(struct drbd_request *req) -{ - const unsigned long s = req->rq_state; - - /* req->master_bio still present means: Not yet completed. - * - * Unless this is RQ_POSTPONED, which will cause drbd_req_destroy() to - * queue it on the retry workqueue instead of destroying it. - */ - if (req->master_bio && !(s & RQ_POSTPONED)) - return; - - /* Local still pending, even though master_bio is already completed? - * may happen for RQ_LOCAL_ABORTED requests. */ - if (s & RQ_LOCAL_PENDING) - return; - - if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) { - /* this is disconnected (local only) operation, - * or protocol A, B, or C P_BARRIER_ACK, - * or killed from the transfer log due to connection loss. */ - kref_put(&req->kref, drbd_req_destroy); - } - /* else: network part and not DONE yet. that is - * protocol A, B, or C, barrier ack still pending... */ -} - /* Helper for __req_mod(). * Set m->bio to the master bio, if it is fit to be completed, * or leave it alone (it is initialized to NULL in __req_mod), @@ -225,10 +198,12 @@ void req_may_be_done(struct drbd_request *req) * If m->bio is set, the error status to be returned is placed in m->error. */ static -void req_may_be_completed(struct drbd_request *req, struct bio_and_error *m) +void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) { - const unsigned long s = req->rq_state; + const unsigned s = req->rq_state; struct drbd_conf *mdev = req->w.mdev; + int rw; + int error, ok; /* we must not complete the master bio, while it is * still being processed by _drbd_send_zc_bio (drbd_send_dblock) @@ -239,116 +214,208 @@ void req_may_be_completed(struct drbd_request *req, struct bio_and_error *m) * the receiver, * the bio_endio completion callbacks. */ - if (s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED)) + if ((s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED)) || + (s & RQ_NET_QUEUED) || (s & RQ_NET_PENDING) || + (s & RQ_COMPLETION_SUSP)) { + dev_err(DEV, "drbd_req_complete: Logic BUG rq_state = 0x%x\n", s); return; - if (s & RQ_NET_QUEUED) - return; - if (s & RQ_NET_PENDING) - return; - - /* FIXME - * instead of all the RQ_FLAGS, actually use the completion_ref - * to decide if this is ready to be completed. */ - if (req->master_bio) { - int complete = atomic_dec_and_test(&req->completion_ref); - D_ASSERT(complete != 0); - } else - D_ASSERT(atomic_read(&req->completion_ref) == 0); - - if (req->master_bio) { - int rw = bio_rw(req->master_bio); - - /* this is DATA_RECEIVED (remote read) - * or protocol C P_WRITE_ACK - * or protocol B P_RECV_ACK - * or protocol A "HANDED_OVER_TO_NETWORK" (SendAck) - * or canceled or failed, - * or killed from the transfer log due to connection loss. - */ - - /* - * figure out whether to report success or failure. - * - * report success when at least one of the operations succeeded. - * or, to put the other way, - * only report failure, when both operations failed. - * - * what to do about the failures is handled elsewhere. - * what we need to do here is just: complete the master_bio. - * - * local completion error, if any, has been stored as ERR_PTR - * in private_bio within drbd_request_endio. - */ - int ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK); - int error = PTR_ERR(req->private_bio); - - /* remove the request from the conflict detection - * respective block_id verification hash */ - if (!drbd_interval_empty(&req->i)) { - struct rb_root *root; - - if (rw == WRITE) - root = &mdev->write_requests; - else - root = &mdev->read_requests; - drbd_remove_request_interval(root, req); - } else if (!(s & RQ_POSTPONED)) - D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0); - - /* Before we can signal completion to the upper layers, - * we may need to close the current transfer log epoch. - * We are within the request lock, so we can simply compare - * the request epoch number with the current transfer log - * epoch number. If they match, increase the current_tle_nr, - * and reset the transfer log epoch write_cnt. - */ - if (rw == WRITE && - req->epoch == atomic_read(&mdev->tconn->current_tle_nr)) - start_new_tl_epoch(mdev->tconn); - - /* Update disk stats */ - _drbd_end_io_acct(mdev, req); - - /* If READ failed, - * have it be pushed back to the retry work queue, - * so it will re-enter __drbd_make_request(), - * and be re-assigned to a suitable local or remote path, - * or failed if we do not have access to good data anymore. - * - * Unless it was failed early by __drbd_make_request(), - * because no path was available, in which case - * it was not even added to the transfer_log. - * - * READA may fail, and will not be retried. - * - * WRITE should have used all available paths already. - */ - if (!ok && rw == READ && !list_empty(&req->tl_requests)) - req->rq_state |= RQ_POSTPONED; - - if (!(req->rq_state & RQ_POSTPONED)) { - m->error = ok ? 0 : (error ?: -EIO); - m->bio = req->master_bio; - req->master_bio = NULL; - } else { - /* Assert that this will be _req_is_done() - * with this very invokation. */ - /* FIXME: - * what about (RQ_LOCAL_PENDING | RQ_LOCAL_ABORTED)? - */ - D_ASSERT(!(s & RQ_LOCAL_PENDING)); - D_ASSERT((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)); - } } - req_may_be_done(req); + + if (!req->master_bio) { + dev_err(DEV, "drbd_req_complete: Logic BUG, master_bio == NULL!\n"); + return; + } + + rw = bio_rw(req->master_bio); + + /* + * figure out whether to report success or failure. + * + * report success when at least one of the operations succeeded. + * or, to put the other way, + * only report failure, when both operations failed. + * + * what to do about the failures is handled elsewhere. + * what we need to do here is just: complete the master_bio. + * + * local completion error, if any, has been stored as ERR_PTR + * in private_bio within drbd_request_endio. + */ + ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK); + error = PTR_ERR(req->private_bio); + + /* remove the request from the conflict detection + * respective block_id verification hash */ + if (!drbd_interval_empty(&req->i)) { + struct rb_root *root; + + if (rw == WRITE) + root = &mdev->write_requests; + else + root = &mdev->read_requests; + drbd_remove_request_interval(root, req); + } else if (!(s & RQ_POSTPONED)) + D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0); + + /* Before we can signal completion to the upper layers, + * we may need to close the current transfer log epoch. + * We are within the request lock, so we can simply compare + * the request epoch number with the current transfer log + * epoch number. If they match, increase the current_tle_nr, + * and reset the transfer log epoch write_cnt. + */ + if (rw == WRITE && + req->epoch == atomic_read(&mdev->tconn->current_tle_nr)) + start_new_tl_epoch(mdev->tconn); + + /* Update disk stats */ + _drbd_end_io_acct(mdev, req); + + /* If READ failed, + * have it be pushed back to the retry work queue, + * so it will re-enter __drbd_make_request(), + * and be re-assigned to a suitable local or remote path, + * or failed if we do not have access to good data anymore. + * + * Unless it was failed early by __drbd_make_request(), + * because no path was available, in which case + * it was not even added to the transfer_log. + * + * READA may fail, and will not be retried. + * + * WRITE should have used all available paths already. + */ + if (!ok && rw == READ && !list_empty(&req->tl_requests)) + req->rq_state |= RQ_POSTPONED; + + if (!(req->rq_state & RQ_POSTPONED)) { + m->error = ok ? 0 : (error ?: -EIO); + m->bio = req->master_bio; + req->master_bio = NULL; + } else { + /* Assert that this will be drbd_req_destroy()ed + * with this very invokation. */ + D_ASSERT(atomic_read(&req->kref.refcount) == 1); + } } -static void req_may_be_completed_not_susp(struct drbd_request *req, struct bio_and_error *m) +static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put) { struct drbd_conf *mdev = req->w.mdev; + D_ASSERT(m || (req->rq_state & RQ_POSTPONED)); - if (!drbd_suspended(mdev)) - req_may_be_completed(req, m); + if (!atomic_sub_and_test(put, &req->completion_ref)) + return 0; + + if (drbd_suspended(mdev)) { + /* We do not allow completion while suspended. Re-get a + * reference, so whatever happens when this is resumed + * may put and complete. */ + + D_ASSERT(!(req->rq_state & RQ_COMPLETION_SUSP)); + req->rq_state |= RQ_COMPLETION_SUSP; + atomic_inc(&req->completion_ref); + return 0; + } + + /* else */ + drbd_req_complete(req, m); + return 1; +} + +/* I'd like this to be the only place that manipulates + * req->completion_ref and req->kref. */ +static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, + int clear, int set) +{ + struct drbd_conf *mdev = req->w.mdev; + unsigned s = req->rq_state; + int c_put = 0; + int k_put = 0; + + /* apply */ + + req->rq_state &= ~clear; + req->rq_state |= set; + + /* no change? */ + if (req->rq_state == s) + return; + + /* intent: get references */ + + if (!(s & RQ_LOCAL_PENDING) && (set & RQ_LOCAL_PENDING)) + atomic_inc(&req->completion_ref); + + if (!(s & RQ_NET_PENDING) && (set & RQ_NET_PENDING)) { + inc_ap_pending(mdev); + atomic_inc(&req->completion_ref); + } + + if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED)) + atomic_inc(&req->completion_ref); + + if (!(s & RQ_EXP_BARR_ACK) && (set & RQ_EXP_BARR_ACK)) + kref_get(&req->kref); /* wait for the DONE */ + + if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) + atomic_add(req->i.size >> 9, &mdev->ap_in_flight); + + /* progress: put references */ + + if ((s & RQ_COMPLETION_SUSP) && (clear & RQ_COMPLETION_SUSP)) + ++c_put; + + if (!(s & RQ_LOCAL_ABORTED) && (set & RQ_LOCAL_ABORTED)) { + D_ASSERT(req->rq_state & RQ_LOCAL_PENDING); + /* local completion may still come in later, + * we need to keep the req object around. */ + kref_get(&req->kref); + ++c_put; + } + + if ((s & RQ_LOCAL_PENDING) && (clear & RQ_LOCAL_PENDING)) { + if (req->rq_state & RQ_LOCAL_ABORTED) + ++k_put; + else + ++c_put; + } + + if ((s & RQ_NET_PENDING) && (clear & RQ_NET_PENDING)) { + dec_ap_pending(mdev); + ++c_put; + } + + if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED)) + ++c_put; + + if ((s & RQ_EXP_BARR_ACK) && !(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) { + if (req->rq_state & RQ_NET_SENT) + atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); + ++k_put; + } + + /* potentially complete and destroy */ + + if (k_put || c_put) { + /* Completion does it's own kref_put. If we are going to + * kref_sub below, we need req to be still around then. */ + int at_least = k_put + !!c_put; + int refcount = atomic_read(&req->kref.refcount); + if (refcount < at_least) + dev_err(DEV, + "mod_rq_state: Logic BUG: %x -> %x: refcount = %d, should be >= %d\n", + s, req->rq_state, refcount, at_least); + } + + /* If we made progress, retry conflicting peer requests, if any. */ + if (req->i.waiting) + wake_up(&mdev->misc_wait); + + if (c_put) + k_put += drbd_req_put_completion_ref(req, m, c_put); + if (k_put) + kref_sub(&req->kref, k_put, drbd_req_destroy); } /* obviously this could be coded as many single functions @@ -388,7 +455,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* reached via __drbd_make_request * and from w_read_retry_remote */ D_ASSERT(!(req->rq_state & RQ_NET_MASK)); - req->rq_state |= RQ_NET_PENDING; rcu_read_lock(); nc = rcu_dereference(mdev->tconn->net_conf); p = nc->wire_protocol; @@ -396,13 +462,13 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state |= p == DRBD_PROT_C ? RQ_EXP_WRITE_ACK : p == DRBD_PROT_B ? RQ_EXP_RECEIVE_ACK : 0; - inc_ap_pending(mdev); + mod_rq_state(req, m, 0, RQ_NET_PENDING); break; case TO_BE_SUBMITTED: /* locally */ /* reached via __drbd_make_request */ D_ASSERT(!(req->rq_state & RQ_LOCAL_MASK)); - req->rq_state |= RQ_LOCAL_PENDING; + mod_rq_state(req, m, 0, RQ_LOCAL_PENDING); break; case COMPLETED_OK: @@ -411,44 +477,23 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, else mdev->read_cnt += req->i.size >> 9; - req->rq_state |= (RQ_LOCAL_COMPLETED|RQ_LOCAL_OK); - req->rq_state &= ~RQ_LOCAL_PENDING; - - maybe_wakeup_conflicting_requests(req); - req_may_be_completed_not_susp(req, m); + mod_rq_state(req, m, RQ_LOCAL_PENDING, + RQ_LOCAL_COMPLETED|RQ_LOCAL_OK); break; case ABORT_DISK_IO: - req->rq_state |= RQ_LOCAL_ABORTED; - req_may_be_completed_not_susp(req, m); - break; - - case WRITE_COMPLETED_WITH_ERROR: - req->rq_state |= RQ_LOCAL_COMPLETED; - req->rq_state &= ~RQ_LOCAL_PENDING; - - __drbd_chk_io_error(mdev, false); - maybe_wakeup_conflicting_requests(req); - req_may_be_completed_not_susp(req, m); - break; - - case READ_AHEAD_COMPLETED_WITH_ERROR: - /* it is legal to fail READA */ - req->rq_state |= RQ_LOCAL_COMPLETED; - req->rq_state &= ~RQ_LOCAL_PENDING; - req_may_be_completed_not_susp(req, m); + mod_rq_state(req, m, 0, RQ_LOCAL_ABORTED); break; case READ_COMPLETED_WITH_ERROR: drbd_set_out_of_sync(mdev, req->i.sector, req->i.size); - - req->rq_state |= RQ_LOCAL_COMPLETED; - req->rq_state &= ~RQ_LOCAL_PENDING; - - D_ASSERT(!(req->rq_state & RQ_NET_MASK)); - + /* fall through. */ + case WRITE_COMPLETED_WITH_ERROR: __drbd_chk_io_error(mdev, false); - req_may_be_completed_not_susp(req, m); + /* fall through. */ + case READ_AHEAD_COMPLETED_WITH_ERROR: + /* it is legal to fail READA, no __drbd_chk_io_error in that case. */ + mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED); break; case QUEUE_FOR_NET_READ: @@ -461,7 +506,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* So we can verify the handle in the answer packet. * Corresponding drbd_remove_request_interval is in - * req_may_be_completed() */ + * drbd_req_complete() */ D_ASSERT(drbd_interval_empty(&req->i)); drbd_insert_interval(&mdev->read_requests, &req->i); @@ -469,7 +514,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, D_ASSERT(req->rq_state & RQ_NET_PENDING); D_ASSERT((req->rq_state & RQ_LOCAL_MASK) == 0); - req->rq_state |= RQ_NET_QUEUED; + mod_rq_state(req, m, 0, RQ_NET_QUEUED); req->w.cb = w_send_read_req; drbd_queue_work(&mdev->tconn->sender_work, &req->w); break; @@ -479,7 +524,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* from __drbd_make_request only */ /* Corresponding drbd_remove_request_interval is in - * req_may_be_completed() */ + * drbd_req_complete() */ D_ASSERT(drbd_interval_empty(&req->i)); drbd_insert_interval(&mdev->write_requests, &req->i); @@ -504,7 +549,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* queue work item to send data */ D_ASSERT(req->rq_state & RQ_NET_PENDING); - req->rq_state |= RQ_NET_QUEUED; + mod_rq_state(req, m, 0, RQ_NET_QUEUED|RQ_EXP_BARR_ACK); req->w.cb = w_send_dblock; drbd_queue_work(&mdev->tconn->sender_work, &req->w); @@ -519,7 +564,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case QUEUE_FOR_SEND_OOS: - req->rq_state |= RQ_NET_QUEUED; + mod_rq_state(req, m, 0, RQ_NET_QUEUED); req->w.cb = w_send_out_of_sync; drbd_queue_work(&mdev->tconn->sender_work, &req->w); break; @@ -529,64 +574,43 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, case SEND_FAILED: /* real cleanup will be done from tl_clear. just update flags * so it is no longer marked as on the worker queue */ - req->rq_state &= ~RQ_NET_QUEUED; - /* if we did it right, tl_clear should be scheduled only after - * this, so this should not be necessary! */ - req_may_be_completed_not_susp(req, m); + mod_rq_state(req, m, RQ_NET_QUEUED, 0); break; case HANDED_OVER_TO_NETWORK: /* assert something? */ - if (bio_data_dir(req->master_bio) == WRITE) - atomic_add(req->i.size >> 9, &mdev->ap_in_flight); - if (bio_data_dir(req->master_bio) == WRITE && !(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK))) { /* this is what is dangerous about protocol A: * pretend it was successfully written on the peer. */ - if (req->rq_state & RQ_NET_PENDING) { - dec_ap_pending(mdev); - req->rq_state &= ~RQ_NET_PENDING; - req->rq_state |= RQ_NET_OK; - } /* else: neg-ack was faster... */ + if (req->rq_state & RQ_NET_PENDING) + mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK); + /* else: neg-ack was faster... */ /* it is still not yet RQ_NET_DONE until the * corresponding epoch barrier got acked as well, * so we know what to dirty on connection loss */ } - req->rq_state &= ~RQ_NET_QUEUED; - req->rq_state |= RQ_NET_SENT; - req_may_be_completed_not_susp(req, m); + mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_SENT); break; case OOS_HANDED_TO_NETWORK: /* Was not set PENDING, no longer QUEUED, so is now DONE * as far as this connection is concerned. */ - req->rq_state &= ~RQ_NET_QUEUED; - req->rq_state |= RQ_NET_DONE; - req_may_be_completed_not_susp(req, m); + mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_DONE); break; case CONNECTION_LOST_WHILE_PENDING: /* transfer log cleanup after connection loss */ - /* assert something? */ - if (req->rq_state & RQ_NET_PENDING) - dec_ap_pending(mdev); - - p = !(req->rq_state & RQ_WRITE) && req->rq_state & RQ_NET_PENDING; - - req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); - req->rq_state |= RQ_NET_DONE; - if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE) - atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); - - req_may_be_completed(req, m); /* Allowed while state.susp */ + mod_rq_state(req, m, + RQ_NET_OK|RQ_NET_PENDING|RQ_COMPLETION_SUSP, + RQ_NET_DONE); break; case DISCARD_WRITE: /* for discarded conflicting writes of multiple primaries, * there is no need to keep anything in the tl, potential * node crashes are covered by the activity log. */ - req->rq_state |= RQ_NET_DONE; + mod_rq_state(req, NULL, 0, RQ_NET_DONE); /* fall through */ case WRITE_ACKED_BY_PEER_AND_SIS: case WRITE_ACKED_BY_PEER: @@ -605,13 +629,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * see also notes above in HANDED_OVER_TO_NETWORK about * protocol != C */ ack_common: - req->rq_state |= RQ_NET_OK; D_ASSERT(req->rq_state & RQ_NET_PENDING); - dec_ap_pending(mdev); - atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); - req->rq_state &= ~RQ_NET_PENDING; - maybe_wakeup_conflicting_requests(req); - req_may_be_completed_not_susp(req, m); + mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK); break; case POSTPONE_WRITE: @@ -622,64 +641,61 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, */ D_ASSERT(req->rq_state & RQ_NET_PENDING); req->rq_state |= RQ_POSTPONED; - maybe_wakeup_conflicting_requests(req); - req_may_be_completed_not_susp(req, m); + if (req->i.waiting) + wake_up(&mdev->misc_wait); + /* Do not clear RQ_NET_PENDING. This request will make further + * progress via restart_conflicting_writes() or + * fail_postponed_requests(). Hopefully. */ break; case NEG_ACKED: - /* assert something? */ - if (req->rq_state & RQ_NET_PENDING) { - dec_ap_pending(mdev); - if (req->rq_state & RQ_WRITE) - atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); - } - req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); - - req->rq_state |= RQ_NET_DONE; - - maybe_wakeup_conflicting_requests(req); - req_may_be_completed_not_susp(req, m); - /* else: done by HANDED_OVER_TO_NETWORK */ + mod_rq_state(req, m, RQ_NET_OK|RQ_NET_PENDING, RQ_NET_DONE); break; case FAIL_FROZEN_DISK_IO: if (!(req->rq_state & RQ_LOCAL_COMPLETED)) break; - - req_may_be_completed(req, m); /* Allowed while state.susp */ + mod_rq_state(req, m, RQ_COMPLETION_SUSP, 0); break; case RESTART_FROZEN_DISK_IO: if (!(req->rq_state & RQ_LOCAL_COMPLETED)) break; - req->rq_state &= ~RQ_LOCAL_COMPLETED; + mod_rq_state(req, m, + RQ_COMPLETION_SUSP|RQ_LOCAL_COMPLETED, + RQ_LOCAL_PENDING); rv = MR_READ; if (bio_data_dir(req->master_bio) == WRITE) rv = MR_WRITE; - get_ldev(mdev); + get_ldev(mdev); /* always succeeds in this call path */ req->w.cb = w_restart_disk_io; drbd_queue_work(&mdev->tconn->sender_work, &req->w); break; case RESEND: /* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK - before the connection loss (B&C only); only P_BARRIER_ACK was missing. + before the connection loss (B&C only); only P_BARRIER_ACK + (or the local completion?) was missing when we suspended. Throwing them out of the TL here by pretending we got a BARRIER_ACK. During connection handshake, we ensure that the peer was not rebooted. */ if (!(req->rq_state & RQ_NET_OK)) { + /* FIXME could this possibly be a req->w.cb == w_send_out_of_sync? + * in that case we must not set RQ_NET_PENDING. */ + + mod_rq_state(req, m, RQ_COMPLETION_SUSP, RQ_NET_QUEUED|RQ_NET_PENDING); if (req->w.cb) { - /* w.cb expected to be w_send_dblock, or w_send_read_req */ drbd_queue_work(&mdev->tconn->sender_work, &req->w); rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ; - } + } /* else: FIXME can this happen? */ break; } /* else, fall through to BARRIER_ACKED */ case BARRIER_ACKED: + /* barrier ack for READ requests does not make sense */ if (!(req->rq_state & RQ_WRITE)) break; @@ -689,20 +705,17 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * we won't be able to clean them up... */ dev_err(DEV, "FIXME (BARRIER_ACKED but pending)\n"); } - if ((req->rq_state & RQ_NET_MASK) != 0) { - req->rq_state |= RQ_NET_DONE; - if (!(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK))) - atomic_sub(req->i.size>>9, &mdev->ap_in_flight); - } - req_may_be_done(req); /* Allowed while state.susp */ + /* Allowed to complete requests, even while suspended. + * As this is called for all requests within a matching epoch, + * we need to filter, and only set RQ_NET_DONE for those that + * have actually been on the wire. */ + mod_rq_state(req, m, RQ_COMPLETION_SUSP, + (req->rq_state & RQ_NET_MASK) ? RQ_NET_DONE : 0); break; case DATA_RECEIVED: D_ASSERT(req->rq_state & RQ_NET_PENDING); - dec_ap_pending(mdev); - req->rq_state &= ~RQ_NET_PENDING; - req->rq_state |= (RQ_NET_OK|RQ_NET_DONE); - req_may_be_completed_not_susp(req, m); + mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK|RQ_NET_DONE); break; }; @@ -867,6 +880,9 @@ static bool do_remote_read(struct drbd_request *req) if (mdev->state.pdsk != D_UP_TO_DATE) return false; + if (req->private_bio == NULL) + return true; + /* TODO: improve read balancing decisions, take into account drbd * protocol, pending requests etc. */ @@ -877,9 +893,6 @@ static bool do_remote_read(struct drbd_request *req) if (rbm == RB_PREFER_LOCAL && req->private_bio) return false; /* submit locally */ - if (req->private_bio == NULL) - return true; - if (remote_due_to_read_balancing(mdev, req->i.sector, rbm)) { if (req->private_bio) { bio_put(req->private_bio); @@ -1010,7 +1023,7 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long /* We fail READ/READA early, if we can not serve it. * We must do this before req is registered on any lists. - * Otherwise, req_may_be_completed() will queue failed READ for retry. */ + * Otherwise, drbd_req_complete() will queue failed READ for retry. */ if (rw != WRITE) { if (!do_remote_read(req) && !req->private_bio) goto nodata; @@ -1042,19 +1055,18 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long /* but we need to give up the spinlock to submit */ spin_unlock_irq(&mdev->tconn->req_lock); drbd_submit_req_private_bio(req); - /* once we have submitted, we must no longer look at req, - * it may already be destroyed. */ - return; + spin_lock_irq(&mdev->tconn->req_lock); } else if (no_remote) { nodata: if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); /* A write may have been queued for send_oos, however. - * So we can not simply free it, we must go through req_may_be_completed() */ + * So we can not simply free it, we must go through drbd_req_put_completion_ref() */ } out: - req_may_be_completed(req, &m); + if (drbd_req_put_completion_ref(req, &m, 1)) + kref_put(&req->kref, drbd_req_destroy); spin_unlock_irq(&mdev->tconn->req_lock); if (m.bio) diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index f80af27fa5e..90e5a1eea72 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -203,11 +203,18 @@ enum drbd_req_state_bits { /* The peer has sent a retry ACK */ __RQ_POSTPONED, + /* would have been completed, + * but was not, because of drbd_suspended() */ + __RQ_COMPLETION_SUSP, + /* We expect a receive ACK (wire proto B) */ __RQ_EXP_RECEIVE_ACK, /* We expect a write ACK (wite proto C) */ __RQ_EXP_WRITE_ACK, + + /* waiting for a barrier ack, did an extra kref_get */ + __RQ_EXP_BARR_ACK, }; #define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) @@ -230,8 +237,10 @@ enum drbd_req_state_bits { #define RQ_WRITE (1UL << __RQ_WRITE) #define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG) #define RQ_POSTPONED (1UL << __RQ_POSTPONED) +#define RQ_COMPLETION_SUSP (1UL << __RQ_COMPLETION_SUSP) #define RQ_EXP_RECEIVE_ACK (1UL << __RQ_EXP_RECEIVE_ACK) #define RQ_EXP_WRITE_ACK (1UL << __RQ_EXP_WRITE_ACK) +#define RQ_EXP_BARR_ACK (1UL << __RQ_EXP_BARR_ACK) /* For waking up the frozen transfer log mod_req() has to return if the request should be counted in the epoch object*/ From 934722a2dbf87b43d39c787441e511157d5add94 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 24 Jul 2012 09:31:18 +0200 Subject: [PATCH 531/609] drbd: __req_mod: make DISCARD_WRITE and independend case cherry-picked and adapted from drbd 9 devel branch This looks cleaner to me, and also gets rid of the other ugly if-inside-case-fall-through. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 329528d9dec..f2ba43e7837 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -609,13 +609,19 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, case DISCARD_WRITE: /* for discarded conflicting writes of multiple primaries, * there is no need to keep anything in the tl, potential - * node crashes are covered by the activity log. */ - mod_rq_state(req, NULL, 0, RQ_NET_DONE); - /* fall through */ + * node crashes are covered by the activity log. + * + * If this request had been marked as RQ_POSTPONED before, + * it will actually not be discarded, but "restarted", + * resubmitted from the retry worker context. */ + D_ASSERT(req->rq_state & RQ_NET_PENDING); + D_ASSERT(req->rq_state & RQ_EXP_WRITE_ACK); + mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_DONE|RQ_NET_OK); + break; + case WRITE_ACKED_BY_PEER_AND_SIS: + req->rq_state |= RQ_NET_SIS; case WRITE_ACKED_BY_PEER: - if (what == WRITE_ACKED_BY_PEER_AND_SIS) - req->rq_state |= RQ_NET_SIS; D_ASSERT(req->rq_state & RQ_EXP_WRITE_ACK); /* protocol C; successfully written on peer. * Nothing more to do here. From 9a278a7906066a1b4f37fff9b5e27a92af0ca3ce Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 24 Jul 2012 10:12:36 +0200 Subject: [PATCH 532/609] drbd: allow read requests to be retried after force-detach Sometimes, a lower level block device turns into a tar-pit, not completing requests at all, not even doing error completion. We can force-detach from such a tar-pit block device, either by disk-timeout, or by drbdadm detach --force. Queueing for retry only from the request destruction path (kref hit 0) makes it impossible to retry affected read requests from the peer, until the local IO completion happened, as the locally submitted bio holds a reference on the drbd request object. If we can only complete READs when the local completion finally happens, we would not need to force-detach in the first place. Instead, queue for retry where we otherwise had done the error completion. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 21 +++++++++++++++++---- drivers/block/drbd/drbd_req.c | 19 ++++++++++--------- drivers/block/drbd/drbd_req.h | 1 + 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index d07cb31a36e..c0acd86c841 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2216,12 +2216,25 @@ static void do_retry(struct work_struct *ws) struct drbd_conf *mdev = req->w.mdev; struct bio *bio = req->master_bio; unsigned long start_time = req->start_time; + bool expected; - /* We have exclusive access to this request object. - * If it had not been RQ_POSTPONED, the code path which queued - * it here would have completed and freed it already. + expected = + expect(atomic_read(&req->completion_ref) == 0) && + expect(req->rq_state & RQ_POSTPONED) && + expect((req->rq_state & RQ_LOCAL_PENDING) == 0 || + (req->rq_state & RQ_LOCAL_ABORTED) != 0); + + if (!expected) + dev_err(DEV, "req=%p completion_ref=%d rq_state=%x\n", + req, atomic_read(&req->completion_ref), + req->rq_state); + + /* We still need to put one kref associated with the + * "completion_ref" going zero in the code path that queued it + * here. The request object may still be referenced by a + * frozen local req->private_bio, in case we force-detached. */ - mempool_free(req, drbd_request_mempool); + kref_put(&req->kref, drbd_req_destroy); /* A single suspended or otherwise blocking device may stall * all others as well. Fortunately, this code path is to diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index f2ba43e7837..c45479aaff8 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -92,7 +92,7 @@ static struct drbd_request *drbd_req_new(struct drbd_conf *mdev, return req; } -static void drbd_req_destroy(struct kref *kref) +void drbd_req_destroy(struct kref *kref) { struct drbd_request *req = container_of(kref, struct drbd_request, kref); struct drbd_conf *mdev = req->w.mdev; @@ -152,10 +152,7 @@ static void drbd_req_destroy(struct kref *kref) } } - if (s & RQ_POSTPONED) - drbd_restart_request(req); - else - mempool_free(req, drbd_request_mempool); + mempool_free(req, drbd_request_mempool); } static void wake_all_senders(struct drbd_tconn *tconn) { @@ -292,10 +289,6 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) m->error = ok ? 0 : (error ?: -EIO); m->bio = req->master_bio; req->master_bio = NULL; - } else { - /* Assert that this will be drbd_req_destroy()ed - * with this very invokation. */ - D_ASSERT(atomic_read(&req->kref.refcount) == 1); } } @@ -320,6 +313,14 @@ static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_ /* else */ drbd_req_complete(req, m); + + if (req->rq_state & RQ_POSTPONED) { + /* don't destroy the req object just yet, + * but queue it for retry */ + drbd_restart_request(req); + return 0; + } + return 1; } diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 90e5a1eea72..9611713c358 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -267,6 +267,7 @@ struct bio_and_error { int error; }; +extern void drbd_req_destroy(struct kref *kref); extern void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m); extern int __req_mod(struct drbd_request *req, enum drbd_req_event what, From 27012382bc221a8cf6ca67ced19df699b0ba8e78 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 24 Jul 2012 10:13:55 +0200 Subject: [PATCH 533/609] drbd: take error path in drbd_adm_down if interrupted by signal drbd_adm_down() does adm_detach(), which can fail with various error codes, or be interrupted by a signal. The interrupted by signal case was not properly handled, leading to block drbd0: ASSERT( mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE ) in drbd/drbd_worker.c and further to destroying objects while still in use, and resulting crashes. Detect the interruption, and take the error path out. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index bbc5c2f4a9b..323293e8887 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -3188,7 +3188,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) /* detach */ idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { retcode = adm_detach(mdev, 0); - if (retcode < SS_SUCCESS) { + if (retcode < SS_SUCCESS || retcode > NO_ERROR) { drbd_msg_put_info("failed to detach"); goto out; } From 3b9ef85e05f123f535d4ee1c5041b80f80648e50 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 30 Jul 2012 09:06:26 +0200 Subject: [PATCH 534/609] drbd: fix null pointer dereference with on-congestion policy when diskless We must not look at mdev->actlog, unless we have a get_ldev() reference. It also does not make much sense to try to disconnect or pull-ahead of the peer, if we don't have good local data. Only even consider congestion policies, if our local disk is D_UP_TO_DATE. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index c45479aaff8..891c3d41a27 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -823,7 +823,7 @@ static void complete_conflicting_writes(struct drbd_request *req) } /* called within req_lock and rcu_read_lock() */ -static bool conn_check_congested(struct drbd_conf *mdev) +static void maybe_pull_ahead(struct drbd_conf *mdev) { struct drbd_tconn *tconn = mdev->tconn; struct net_conf *nc; @@ -834,7 +834,14 @@ static bool conn_check_congested(struct drbd_conf *mdev) on_congestion = nc ? nc->on_congestion : OC_BLOCK; if (on_congestion == OC_BLOCK || tconn->agreed_pro_version < 96) - return false; + return; + + /* If I don't even have good local storage, we can not reasonably try + * to pull ahead of the peer. We also need the local reference to make + * sure mdev->act_log is there. + */ + if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) + return; if (nc->cong_fill && atomic_read(&mdev->ap_in_flight) >= nc->cong_fill) { @@ -857,8 +864,7 @@ static bool conn_check_congested(struct drbd_conf *mdev) else /*nc->on_congestion == OC_DISCONNECT */ _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL); } - - return congested; + put_ldev(mdev); } /* If this returns false, and req->private_bio is still set, @@ -923,7 +929,7 @@ static int drbd_process_write_request(struct drbd_request *req) rcu_read_lock(); remote = drbd_should_do_remote(mdev->state); if (remote) { - conn_check_congested(mdev); + maybe_pull_ahead(mdev); remote = drbd_should_do_remote(mdev->state); } send_oos = drbd_should_send_out_of_sync(mdev->state); From bf709c8552bcbbbc66ecc11555a781e814a037d8 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 30 Jul 2012 09:07:04 +0200 Subject: [PATCH 535/609] drbd: cleanup, remove two unused global flags The two unused "global flags" in 8.3 are "per volume" flags in 8.4. Still, they are unused, so lose them. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8b26a2c954d..b343875c9de 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -672,7 +672,6 @@ enum { /* flag bits per mdev */ enum { - UNPLUG_QUEUED, /* only relevant with kernel 2.4 */ UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */ MD_DIRTY, /* current uuids and flags not yet on disk */ USE_DEGR_WFC_T, /* degr-wfc-timeout instead of wfc-timeout. */ @@ -681,7 +680,6 @@ enum { CRASHED_PRIMARY, /* This node was a crashed primary. * Gets cleared when the state.conn * goes into C_CONNECTED state. */ - NO_BARRIER_SUPP, /* underlying block device doesn't implement barriers */ CONSIDER_RESYNC, MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */ From 0c849666016cbf541c1030eec55f5f8dd1fba513 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 30 Jul 2012 09:07:28 +0200 Subject: [PATCH 536/609] drbd: differentiate between normal and forced detach Aborting local requests (not waiting for completion from the lower level disk) is dangerous: if the master bio has been completed to upper layers, data pages may be re-used for other things already. If local IO is still pending and later completes, this may cause crashes or corrupt unrelated data. Only abort local IO if explicitly requested. Intended use case is a lower level device that turned into a tarpit, not completing io requests, not even doing error completion. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 2 +- drivers/block/drbd/drbd_bitmap.c | 4 ++-- drivers/block/drbd/drbd_int.h | 17 ++++++++++++++--- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_nl.c | 4 ++++ drivers/block/drbd/drbd_req.c | 4 ++-- drivers/block/drbd/drbd_state.c | 18 +++++++++++++++--- drivers/block/drbd/drbd_worker.c | 4 ++-- 8 files changed, 41 insertions(+), 14 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index f500dc5cdf5..209b2e063b9 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -445,7 +445,7 @@ _al_write_transaction(struct drbd_conf *mdev) /* drbd_chk_io_error done already */ else if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { err = -EIO; - drbd_chk_io_error(mdev, 1, true); + drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); } else { /* advance ringbuffer position and transaction counter */ mdev->al_tr_pos = (mdev->al_tr_pos + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index b3d55d4b693..33626e34c92 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1135,7 +1135,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w if (ctx->error) { dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); - drbd_chk_io_error(mdev, 1, true); + drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); err = -EIO; /* ctx->error ? */ } @@ -1260,7 +1260,7 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); if (ctx->error) - drbd_chk_io_error(mdev, 1, true); + drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); /* that should force detach, so the in memory bitmap will be * gone in a moment as well. */ diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index b343875c9de..963766bafab 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -689,6 +689,7 @@ enum { BITMAP_IO_QUEUED, /* Started bitmap IO */ GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */ WAS_IO_ERROR, /* Local disk failed returned IO error */ + FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */ RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ RESIZE_PENDING, /* Size change detected locally, waiting for the response from * the peer, if it changed there as well. */ @@ -1653,8 +1654,16 @@ static inline union drbd_state drbd_read_state(struct drbd_conf *mdev) return rv; } +enum drbd_force_detach_flags { + DRBD_IO_ERROR, + DRBD_META_IO_ERROR, + DRBD_FORCE_DETACH, +}; + #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) -static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where) +static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, + enum drbd_force_detach_flags forcedetach, + const char *where) { enum drbd_io_error_p ep; @@ -1663,7 +1672,7 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, rcu_read_unlock(); switch (ep) { case EP_PASS_ON: /* FIXME would this be better named "Ignore"? */ - if (!forcedetach) { + if (forcedetach == DRBD_IO_ERROR) { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Local IO failed in %s.\n", where); if (mdev->state.disk > D_INCONSISTENT) @@ -1674,6 +1683,8 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, case EP_DETACH: case EP_CALL_HELPER: set_bit(WAS_IO_ERROR, &mdev->flags); + if (forcedetach == DRBD_FORCE_DETACH) + set_bit(FORCE_DETACH, &mdev->flags); if (mdev->state.disk > D_FAILED) { _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL); dev_err(DEV, @@ -1693,7 +1704,7 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, */ #define drbd_chk_io_error(m,e,f) drbd_chk_io_error_(m,e,f, __func__) static inline void drbd_chk_io_error_(struct drbd_conf *mdev, - int error, int forcedetach, const char *where) + int error, enum drbd_force_detach_flags forcedetach, const char *where) { if (error) { unsigned long flags; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index c0acd86c841..849e5de9ea8 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2866,7 +2866,7 @@ void drbd_md_sync(struct drbd_conf *mdev) if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { /* this was a try anyways ... */ dev_err(DEV, "meta data update failed!\n"); - drbd_chk_io_error(mdev, 1, true); + drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); } /* Update mdev->ldev->md.la_size_sect, diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 323293e8887..d4c05e26a13 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1299,6 +1299,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) * to realize a "hot spare" feature (not that I'd recommend that) */ wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); + /* make sure there is no leftover from previous force-detach attempts */ + clear_bit(FORCE_DETACH, &mdev->flags); + /* allocation not in the IO path, drbdsetup context */ nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); if (!nbc) { @@ -1683,6 +1686,7 @@ static int adm_detach(struct drbd_conf *mdev, int force) int ret; if (force) { + set_bit(FORCE_DETACH, &mdev->flags); drbd_force_state(mdev, NS(disk, D_FAILED)); retcode = SS_SUCCESS; goto out; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 891c3d41a27..e215dce4c69 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -490,7 +490,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, drbd_set_out_of_sync(mdev, req->i.sector, req->i.size); /* fall through. */ case WRITE_COMPLETED_WITH_ERROR: - __drbd_chk_io_error(mdev, false); + __drbd_chk_io_error(mdev, DRBD_IO_ERROR); /* fall through. */ case READ_AHEAD_COMPLETED_WITH_ERROR: /* it is legal to fail READA, no __drbd_chk_io_error in that case. */ @@ -1210,7 +1210,7 @@ void request_timer_fn(unsigned long data) time_after(now, req->start_time + dt) && !time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) { dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n"); - __drbd_chk_io_error(mdev, 1); + __drbd_chk_io_error(mdev, DRBD_FORCE_DETACH); } nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et; spin_unlock_irq(&tconn->req_lock); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 84a5072d737..c9ec7d37632 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1335,9 +1335,21 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); - /* Immediately allow completion of all application IO, that waits - for completion from the local disk. */ - tl_abort_disk_io(mdev); + /* Immediately allow completion of all application IO, + * that waits for completion from the local disk, + * if this was a force-detach due to disk_timeout + * or administrator request (drbdsetup detach --force). + * Do NOT abort otherwise. + * Aborting local requests may cause serious problems, + * if requests are completed to upper layers already, + * and then later the already submitted local bio completes. + * This can cause DMA into former bio pages that meanwhile + * have been re-used for other things. + * So aborting local requests may cause crashes, + * or even worse, silent data corruption. + */ + if (test_and_clear_bit(FORCE_DETACH, &mdev->flags)) + tl_abort_disk_io(mdev); /* current state still has to be D_FAILED, * there is only one way out: to D_DISKLESS, diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 66be3910e8d..07a4046dd8c 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -106,7 +106,7 @@ void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(lo if (list_empty(&mdev->read_ee)) wake_up(&mdev->ee_wait); if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) - __drbd_chk_io_error(mdev, false); + __drbd_chk_io_error(mdev, DRBD_IO_ERROR); spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); drbd_queue_work(&mdev->tconn->sender_work, &peer_req->w); @@ -148,7 +148,7 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee); if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) - __drbd_chk_io_error(mdev, false); + __drbd_chk_io_error(mdev, DRBD_IO_ERROR); spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); if (block_id == ID_SYNCER) From 6f3465ed82b10922effe364676103cbd4f2bcd81 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 30 Jul 2012 09:08:25 +0200 Subject: [PATCH 537/609] drbd: report congestion if we are waiting for some userland callback If the drbd worker thread is synchronously waiting for some userland callback, we don't want some casual pageout to block on us. Have drbd_congested() report congestion in that case. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 6 ++++++ drivers/block/drbd/drbd_main.c | 16 ++++++++++++++++ drivers/block/drbd/drbd_nl.c | 9 ++++++++- 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 963766bafab..e010afff336 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -815,6 +815,12 @@ enum { CONN_DRY_RUN, /* Expect disconnect after resync handshake. */ CREATE_BARRIER, /* next P_DATA is preceded by a P_BARRIER */ STATE_SENT, /* Do not change state/UUIDs while this is set */ + CALLBACK_PENDING, /* Whether we have a call_usermodehelper(, UMH_WAIT_PROC) + * pending, from drbd worker context. + * If set, bdi_write_congested() returns true, + * so shrink_page_list() would not recurse into, + * and potentially deadlock on, this drbd worker. + */ }; struct drbd_tconn { /* is a resource from the config file */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 849e5de9ea8..f2af74d0686 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2338,6 +2338,22 @@ static int drbd_congested(void *congested_data, int bdi_bits) goto out; } + if (test_bit(CALLBACK_PENDING, &mdev->tconn->flags)) { + r |= (1 << BDI_async_congested); + /* Without good local data, we would need to read from remote, + * and that would need the worker thread as well, which is + * currently blocked waiting for that usermode helper to + * finish. + */ + if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) + r |= (1 << BDI_sync_congested); + else + put_ldev(mdev); + r &= bdi_bits; + reason = 'c'; + goto out; + } + if (get_ldev(mdev)) { q = bdev_get_queue(mdev->ldev->backing_bdev); r = bdi_congested(&q->backing_dev_info, bdi_bits); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index d4c05e26a13..05ed4804c72 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -323,11 +323,15 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) NULL }; char mb[12]; char *argv[] = {usermode_helper, cmd, mb, NULL }; + struct drbd_tconn *tconn = mdev->tconn; struct sib_info sib; int ret; + if (current == tconn->worker.task) + set_bit(CALLBACK_PENDING, &tconn->flags); + snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev)); - setup_khelper_env(mdev->tconn, envp); + setup_khelper_env(tconn, envp); /* The helper may take some time. * write out any unsynced meta data changes now */ @@ -350,6 +354,9 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) sib.helper_exit_code = ret; drbd_bcast_event(mdev, &sib); + if (current == tconn->worker.task) + clear_bit(CALLBACK_PENDING, &tconn->flags); + if (ret < 0) /* Ignore any ERRNOs we got. */ ret = 0; From 8a943170711b7a4d63528ea8eb6a41cc91e79309 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 30 Jul 2012 09:09:36 +0200 Subject: [PATCH 538/609] drbd: reset congestion information before reporting it in /proc/drbd We cache the congestion status in mdev->congestion_reason whenever drbd_congested() was called. Reset this cached info before reporting it when reading /proc/drbd. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_proc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 64e0ddbf066..e0f0d2a6d53 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -242,6 +242,9 @@ static int drbd_seq_show(struct seq_file *seq, void *v) mdev->state.role == R_SECONDARY) { seq_printf(seq, "%2d: cs:Unconfigured\n", i); } else { + /* reset mdev->congestion_reason */ + bdi_rw_congested(&mdev->rq_queue->backing_dev_info); + nc = rcu_dereference(mdev->tconn->net_conf); wp = nc ? nc->wire_protocol - DRBD_PROT_A + 'A' : ' '; seq_printf(seq, From a324896b173e569fb831c5caa04ccd02ec0bc9ca Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 30 Jul 2012 09:10:41 +0200 Subject: [PATCH 539/609] drbd: do not reset rs_pending_cnt too early Fix asserts like block drbd0: in got_BlockAck:4634: rs_pending_cnt = -35 < 0 ! We reset the resync lru cache and related information (rs_pending_cnt), once we successfully finished a resync or online verify, or if the replication connection is lost. We also need to reset it if a resync or online verify is aborted because a lower level disk failed. In that case the replication link is still established, and we may still have packets queued in the network buffers which want to touch rs_pending_cnt. We do not have any synchronization mechanism to know for sure when all such pending resync related packets have been drained. To avoid this counter to go negative (and violate the ASSERT that it will always be >= 0), just do not reset it when we lose a disk. It is good enough to make sure it is re-initialized before the next resync can start: reset it when we re-attach a disk. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 5 +++++ drivers/block/drbd/drbd_state.c | 11 +++++++---- drivers/block/drbd/drbd_worker.c | 8 -------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 05ed4804c72..a2925dedc23 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1309,6 +1309,11 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) /* make sure there is no leftover from previous force-detach attempts */ clear_bit(FORCE_DETACH, &mdev->flags); + /* and no leftover from previously aborted resync or verify, either */ + mdev->rs_total = 0; + mdev->rs_failed = 0; + atomic_set(&mdev->rs_pending_cnt, 0); + /* allocation not in the IO path, drbdsetup context */ nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); if (!nbc) { diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index c9ec7d37632..ad307fb8dc2 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1216,6 +1216,13 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* Do not change the order of the if above and the two below... */ if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS && ns.pdsk != D_UNKNOWN) { /* attach on the peer */ + /* we probably will start a resync soon. + * make sure those things are properly reset. */ + mdev->rs_total = 0; + mdev->rs_failed = 0; + atomic_set(&mdev->rs_pending_cnt, 0); + drbd_rs_cancel_all(mdev); + drbd_send_uuids(mdev); drbd_send_state(mdev, ns); } @@ -1386,10 +1393,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, "ASSERT FAILED: disk is %s while going diskless\n", drbd_disk_str(mdev->state.disk)); - mdev->rs_total = 0; - mdev->rs_failed = 0; - atomic_set(&mdev->rs_pending_cnt, 0); - if (ns.conn >= C_CONNECTED) drbd_send_state(mdev, ns); /* corresponding get_ldev in __drbd_set_state diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 07a4046dd8c..9d7e1fb0f43 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1549,14 +1549,6 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) return; } - if (mdev->state.conn < C_AHEAD) { - /* In case a previous resync run was aborted by an IO error/detach on the peer. */ - drbd_rs_cancel_all(mdev); - /* This should be done when we abort the resync. We definitely do not - want to have this for connections going back and forth between - Ahead/Behind and SyncSource/SyncTarget */ - } - if (!test_bit(B_RS_H_DONE, &mdev->flags)) { if (side == C_SYNC_TARGET) { /* Since application IO was locked out during C_WF_BITMAP_T and From 6f1a656325ea01638de44f8d40bb77c73181b275 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 30 Jul 2012 09:11:01 +0200 Subject: [PATCH 540/609] drbd: call local-io-error handler early In case we want to hard-reset from the local-io-error handler, we need to call it before notifying the peer or aborting local IO. Otherwise the peer will advance its data generation UUIDs even if secondary. This way, local io error looks like a "regular" node crash, which reduces the number of different failure cases. This may be useful in a bigger picture where crashed or otherwise "misbehaving" nodes are automatically re-deployed. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index ad307fb8dc2..444581828d7 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1342,6 +1342,9 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); + if (was_io_error && eh == EP_CALL_HELPER) + drbd_khelper(mdev, "local-io-error"); + /* Immediately allow completion of all application IO, * that waits for completion from the local disk, * if this was a force-detach due to disk_timeout @@ -1377,9 +1380,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, drbd_md_sync(mdev); } put_ldev(mdev); - - if (was_io_error && eh == EP_CALL_HELPER) - drbd_khelper(mdev, "local-io-error"); } /* second half of local IO error, failure to attach, From 970fbde1f1ebae0c85bbaed3de83684a58d60fad Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 30 Jul 2012 09:11:38 +0200 Subject: [PATCH 541/609] drbd: flush drbd work queue before invalidate/invalidate remote If you do back to back wait-sync/invalidate on a Primary in a tight loop, during application IO load, you could trigger a race: kernel: block drbd6: FIXME going to queue 'set_n_write from StartingSync' but 'write from resync_finished' still pending? Fix this by changing the order of the drbd_queue_work() and the wake_up() in dec_ap_pending(), and adding the additional drbd_flush_workqueue() before requesting the full sync. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 10 ++++++---- drivers/block/drbd/drbd_nl.c | 8 ++++++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e010afff336..22adfc7189d 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -2251,15 +2251,17 @@ static inline void dec_ap_bio(struct drbd_conf *mdev) int ap_bio = atomic_dec_return(&mdev->ap_bio_cnt); D_ASSERT(ap_bio >= 0); + + if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) { + if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) + drbd_queue_work(&mdev->tconn->sender_work, &mdev->bm_io_work.w); + } + /* this currently does wake_up for every dec_ap_bio! * maybe rather introduce some type of hysteresis? * e.g. (ap_bio == mxb/2 || ap_bio == 0) ? */ if (ap_bio < mxb) wake_up(&mdev->misc_wait); - if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) { - if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) - drbd_queue_work(&mdev->tconn->sender_work, &mdev->bm_io_work.w); - } } static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index a2925dedc23..4afd626ca3d 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2408,9 +2408,11 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) mdev = adm_ctx.mdev; /* If there is still bitmap IO pending, probably because of a previous - * resync just being finished, wait for it before requesting a new resync. */ + * resync just being finished, wait for it before requesting a new resync. + * Also wait for it's after_state_ch(). */ drbd_suspend_io(mdev); wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); + drbd_flush_workqueue(mdev); retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); @@ -2475,9 +2477,11 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) mdev = adm_ctx.mdev; /* If there is still bitmap IO pending, probably because of a previous - * resync just being finished, wait for it before requesting a new resync. */ + * resync just being finished, wait for it before requesting a new resync. + * Also wait for it's after_state_ch(). */ drbd_suspend_io(mdev); wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); + drbd_flush_workqueue(mdev); retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); if (retcode < SS_SUCCESS) { From 58ffa580a748dd16b1e5ab260bea39cdbd1e94ef Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 26 Jul 2012 14:09:49 +0200 Subject: [PATCH 542/609] drbd: introduce stop-sector to online verify We now can schedule only a specific range of sectors for online verify, or interrupt a running verify without interrupting the connection. Had to bump the protocol version differently, we are now 101. Added verify_can_do_stop_sector() { protocol >= 97 && protocol != 100; } Also, the return value convention for worker callbacks has changed, we returned "true/false" for "keep the connection up" in 8.3, we return 0 for success and <= for failure in 8.4. Affected: receive_state() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 7 +++++++ drivers/block/drbd/drbd_nl.c | 14 ++++++++----- drivers/block/drbd/drbd_proc.c | 12 ++++++++--- drivers/block/drbd/drbd_receiver.c | 10 ++++++++- drivers/block/drbd/drbd_state.c | 17 +++++++++++---- drivers/block/drbd/drbd_worker.c | 33 ++++++++++++++++++++++++------ include/linux/drbd.h | 2 +- include/linux/drbd_genl.h | 1 + 8 files changed, 76 insertions(+), 20 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 22adfc7189d..eddc4388a1b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -971,6 +971,7 @@ struct drbd_conf { /* where does the admin want us to start? (sector) */ sector_t ov_start_sector; + sector_t ov_stop_sector; /* where are we now? (sector) */ sector_t ov_position; /* Start sector of out of sync range (to merge printk reporting). */ @@ -2264,6 +2265,12 @@ static inline void dec_ap_bio(struct drbd_conf *mdev) wake_up(&mdev->misc_wait); } +static inline bool verify_can_do_stop_sector(struct drbd_conf *mdev) +{ + return mdev->tconn->agreed_pro_version >= 97 && + mdev->tconn->agreed_pro_version != 100; +} + static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) { int changed = mdev->ed_uuid != val; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 4afd626ca3d..eefb56308ae 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2939,6 +2939,7 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) { struct drbd_conf *mdev; enum drbd_ret_code retcode; + struct start_ov_parms parms; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); if (!adm_ctx.reply_skb) @@ -2947,19 +2948,22 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) goto out; mdev = adm_ctx.mdev; + + /* resume from last known position, if possible */ + parms.ov_start_sector = mdev->ov_start_sector; + parms.ov_stop_sector = ULLONG_MAX; if (info->attrs[DRBD_NLA_START_OV_PARMS]) { - /* resume from last known position, if possible */ - struct start_ov_parms parms = - { .ov_start_sector = mdev->ov_start_sector }; int err = start_ov_parms_from_attrs(&parms, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); goto out; } - /* w_make_ov_request expects position to be aligned */ - mdev->ov_start_sector = parms.ov_start_sector & ~BM_SECT_PER_BIT; } + /* w_make_ov_request expects position to be aligned */ + mdev->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1); + mdev->ov_stop_sector = parms.ov_stop_sector; + /* If there is still bitmap IO pending, e.g. previous resync or verify * just being finished, wait for it before requesting a new resync. */ drbd_suspend_io(mdev); diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index e0f0d2a6d53..56672a61eb9 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -167,18 +167,24 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) * we convert to sectors in the display below. */ unsigned long bm_bits = drbd_bm_bits(mdev); unsigned long bit_pos; + unsigned long long stop_sector = 0; if (mdev->state.conn == C_VERIFY_S || - mdev->state.conn == C_VERIFY_T) + mdev->state.conn == C_VERIFY_T) { bit_pos = bm_bits - mdev->ov_left; - else + if (verify_can_do_stop_sector(mdev)) + stop_sector = mdev->ov_stop_sector; + } else bit_pos = mdev->bm_resync_fo; /* Total sectors may be slightly off for oddly * sized devices. So what. */ seq_printf(seq, - "\t%3d%% sector pos: %llu/%llu\n", + "\t%3d%% sector pos: %llu/%llu", (int)(bit_pos / (bm_bits/100+1)), (unsigned long long)bit_pos * BM_SECT_PER_BIT, (unsigned long long)bm_bits * BM_SECT_PER_BIT); + if (stop_sector != 0 && stop_sector != ULLONG_MAX) + seq_printf(seq, " stop sector: %llu", stop_sector); + seq_printf(seq, "\n"); } } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7fe6b01618d..8fddec96dfb 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3843,7 +3843,7 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) * already decided to close the connection again, * we must not "re-establish" it here. */ if (os.conn <= C_TEAR_DOWN) - return false; + return -ECONNRESET; /* If this is the "end of sync" confirmation, usually the peer disk * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits @@ -3875,6 +3875,14 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) } } + /* explicit verify finished notification, stop sector reached. */ + if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE && + peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) { + ov_out_of_sync_print(mdev); + drbd_resync_finished(mdev); + return 0; + } + /* peer says his disk is inconsistent, while we think it is uptodate, * and this happens while the peer still thinks we have a sync going on, * but we think we are already done with the sync. diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 444581828d7..12f2b4fbe55 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -975,13 +975,15 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, wake_up(&mdev->state_wait); wake_up(&mdev->tconn->ping_wait); - /* aborted verify run. log the last position */ + /* Aborted verify run, or we reached the stop sector. + * Log the last position, unless end-of-device. */ if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && - ns.conn < C_CONNECTED) { + ns.conn <= C_CONNECTED) { mdev->ov_start_sector = BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left); - dev_info(DEV, "Online Verify reached sector %llu\n", - (unsigned long long)mdev->ov_start_sector); + if (mdev->ov_left) + dev_info(DEV, "Online Verify reached sector %llu\n", + (unsigned long long)mdev->ov_start_sector); } if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) && @@ -1422,6 +1424,13 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) drbd_send_state(mdev, ns); + /* Verify finished, or reached stop sector. Peer did not know about + * the stop sector, and we may even have changed the stop sector during + * verify to interrupt/stop early. Send the new state. */ + if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED + && verify_can_do_stop_sector(mdev)) + drbd_send_state(mdev, ns); + /* Wake up role changes, that were delayed because of connection establishing */ if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) { if (test_and_clear_bit(STATE_SENT, &mdev->tconn->flags)) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 9d7e1fb0f43..1c9c6fd332c 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -692,6 +692,7 @@ static int w_make_ov_request(struct drbd_work *w, int cancel) int number, i, size; sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); + bool stop_sector_reached = false; if (unlikely(cancel)) return 1; @@ -700,9 +701,17 @@ static int w_make_ov_request(struct drbd_work *w, int cancel) sector = mdev->ov_position; for (i = 0; i < number; i++) { - if (sector >= capacity) { + if (sector >= capacity) return 1; - } + + /* We check for "finished" only in the reply path: + * w_e_end_ov_reply(). + * We need to send at least one request out. */ + stop_sector_reached = i > 0 + && verify_can_do_stop_sector(mdev) + && sector >= mdev->ov_stop_sector; + if (stop_sector_reached) + break; size = BM_BLOCK_SIZE; @@ -726,7 +735,8 @@ static int w_make_ov_request(struct drbd_work *w, int cancel) requeue: mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); - mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); + if (i == 0 || !stop_sector_reached) + mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); return 1; } @@ -792,7 +802,12 @@ int drbd_resync_finished(struct drbd_conf *mdev) dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ; if (dt <= 0) dt = 1; + db = mdev->rs_total; + /* adjust for verify start and stop sectors, respective reached position */ + if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T) + db -= mdev->ov_left; + dbdt = Bit2KB(db/dt); mdev->rs_paused /= HZ; @@ -815,7 +830,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) ns.conn = C_CONNECTED; dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", - verify_done ? "Online verify " : "Resync", + verify_done ? "Online verify" : "Resync", dt + mdev->rs_paused, mdev->rs_paused, dbdt); n_oos = drbd_bm_total_weight(mdev); @@ -896,7 +911,9 @@ out: mdev->rs_total = 0; mdev->rs_failed = 0; mdev->rs_paused = 0; - if (verify_done) + + /* reset start sector, if we reached end of device */ + if (verify_done && mdev->ov_left == 0) mdev->ov_start_sector = 0; drbd_md_sync(mdev); @@ -1144,6 +1161,7 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) unsigned int size = peer_req->i.size; int digest_size; int err, eq = 0; + bool stop_sector_reached = false; if (unlikely(cancel)) { drbd_free_peer_req(mdev, peer_req); @@ -1194,7 +1212,10 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) if ((mdev->ov_left & 0x200) == 0x200) drbd_advance_rs_marks(mdev, mdev->ov_left); - if (mdev->ov_left == 0) { + stop_sector_reached = verify_can_do_stop_sector(mdev) && + (sector + (size>>9)) >= mdev->ov_stop_sector; + + if (mdev->ov_left == 0 || stop_sector_reached) { ov_out_of_sync_print(mdev); drbd_resync_finished(mdev); } diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 36ae7dd28d9..5171c353088 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -55,7 +55,7 @@ extern const char *drbd_buildtag(void); #define REL_VERSION "8.3.11" #define API_VERSION 88 #define PRO_VERSION_MIN 86 -#define PRO_VERSION_MAX 100 +#define PRO_VERSION_MAX 101 enum drbd_io_error_p { diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 92ec4b50a88..9430e9ab37a 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -215,6 +215,7 @@ GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info, GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms, __u64_field(1, DRBD_GENLA_F_MANDATORY, ov_start_sector) + __u64_field(2, DRBD_GENLA_F_MANDATORY, ov_stop_sector) ) GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms, From 81a3537a9730116dfaafeed728f2d460c872c0d5 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 30 Jul 2012 09:00:54 +0200 Subject: [PATCH 543/609] drbd: announce FLUSH/FUA capability to upper layers In 8.4, we may have bios spanning two activity log extents. Fixup drbd_al_begin_io() and drbd_al_complete_io() to deal with zero sized bios. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 12 +++++++++--- drivers/block/drbd/drbd_main.c | 1 + drivers/block/drbd/drbd_receiver.c | 22 ++++++++++++++++------ drivers/block/drbd/drbd_req.c | 1 - 4 files changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 209b2e063b9..e81085795ec 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -248,11 +248,12 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i) /* for bios crossing activity log extent boundaries, * we may need to activate two extents in one go */ unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); - unsigned last = (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); + unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); unsigned enr; bool locked = false; + D_ASSERT(first <= last); D_ASSERT(atomic_read(&mdev->local_cnt) > 0); for (enr = first; enr <= last; enr++) @@ -305,11 +306,12 @@ void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval *i) /* for bios crossing activity log extent boundaries, * we may need to activate two extents in one go */ unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); - unsigned last = (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); + unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); unsigned enr; struct lc_element *extent; unsigned long flags; + D_ASSERT(first <= last); spin_lock_irqsave(&mdev->al_lock, flags); for (enr = first; enr <= last; enr++) { @@ -756,7 +758,11 @@ int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, unsigned int enr, count = 0; struct lc_element *e; - if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { + /* this should be an empty REQ_FLUSH */ + if (size == 0) + return 0; + + if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { dev_err(DEV, "sector: %llus, size: %d\n", (unsigned long long)sector, size); return 0; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f2af74d0686..85d95ec405e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2640,6 +2640,7 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, q->backing_dev_info.congested_data = mdev; blk_queue_make_request(q, drbd_make_request); + blk_queue_flush(q, REQ_FLUSH | REQ_FUA); /* Setting the max_hw_sectors to an odd value of 8kibyte here This triggers a max_bio_size message upon first attach or connect */ blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 8fddec96dfb..a6f0b293836 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -295,6 +295,9 @@ static void drbd_free_pages(struct drbd_conf *mdev, struct page *page, int is_ne atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use; int i; + if (page == NULL) + return; + if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count) i = page_chain_free(page); else { @@ -331,7 +334,7 @@ drbd_alloc_peer_req(struct drbd_conf *mdev, u64 id, sector_t sector, unsigned int data_size, gfp_t gfp_mask) __must_hold(local) { struct drbd_peer_request *peer_req; - struct page *page; + struct page *page = NULL; unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE)) @@ -344,9 +347,11 @@ drbd_alloc_peer_req(struct drbd_conf *mdev, u64 id, sector_t sector, return NULL; } - page = drbd_alloc_pages(mdev, nr_pages, (gfp_mask & __GFP_WAIT)); - if (!page) - goto fail; + if (data_size) { + page = drbd_alloc_pages(mdev, nr_pages, (gfp_mask & __GFP_WAIT)); + if (!page) + goto fail; + } drbd_clear_interval(&peer_req->i); peer_req->i.size = data_size; @@ -1513,8 +1518,6 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, data_size -= dgs; } - if (!expect(data_size != 0)) - return NULL; if (!expect(IS_ALIGNED(data_size, 512))) return NULL; if (!expect(data_size <= DRBD_MAX_BIO_SIZE)) @@ -1537,6 +1540,9 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, if (!peer_req) return NULL; + if (!data_size) + return peer_req; + ds = data_size; page = peer_req->pages; page_chain_for_each(page) { @@ -2199,6 +2205,10 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi) dp_flags = be32_to_cpu(p->dp_flags); rw |= wire_flags_to_bio(mdev, dp_flags); + if (peer_req->pages == NULL) { + D_ASSERT(peer_req->i.size == 0); + D_ASSERT(dp_flags & DP_FLUSH); + } if (dp_flags & DP_MAY_SET_IN_SYNC) peer_req->flags |= EE_MAY_SET_IN_SYNC; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index e215dce4c69..8323449fbba 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1097,7 +1097,6 @@ int drbd_make_request(struct request_queue *q, struct bio *bio) /* * what we "blindly" assume: */ - D_ASSERT(bio->bi_size > 0); D_ASSERT(IS_ALIGNED(bio->bi_size, 512)); inc_ap_bio(mdev); From 3174f8c5045ad247563434c4b4897bd89313eafc Mon Sep 17 00:00:00 2001 From: Philipp Marek Date: Sat, 3 Mar 2012 21:04:30 +0100 Subject: [PATCH 544/609] drbd: pass some more information to userspace. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 11 ++++++++++- include/linux/drbd_genl.h | 10 ++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index eefb56308ae..466d6b1d930 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2666,7 +2666,16 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) || nla_put_u32(skb, T_current_state, mdev->state.i) || nla_put_u64(skb, T_ed_uuid, mdev->ed_uuid) || - nla_put_u64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev))) + nla_put_u64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev)) || + nla_put_u64(skb, T_send_cnt, mdev->send_cnt) || + nla_put_u64(skb, T_recv_cnt, mdev->recv_cnt) || + nla_put_u64(skb, T_read_cnt, mdev->read_cnt) || + nla_put_u64(skb, T_writ_cnt, mdev->writ_cnt) || + nla_put_u64(skb, T_al_writ_cnt, mdev->al_writ_cnt) || + nla_put_u64(skb, T_bm_writ_cnt, mdev->bm_writ_cnt) || + nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&mdev->ap_bio_cnt)) || + nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&mdev->ap_pending_cnt)) || + nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&mdev->rs_pending_cnt))) goto nla_put_failure; if (got_ldev) { diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 9430e9ab37a..d0d8fac8a6e 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -211,6 +211,16 @@ GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info, /* for pre and post notifications of helper execution */ __str_field(13, DRBD_GENLA_F_MANDATORY, helper, 32) __u32_field(14, DRBD_GENLA_F_MANDATORY, helper_exit_code) + + __u64_field(15, 0, send_cnt) + __u64_field(16, 0, recv_cnt) + __u64_field(17, 0, read_cnt) + __u64_field(18, 0, writ_cnt) + __u64_field(19, 0, al_writ_cnt) + __u64_field(20, 0, bm_writ_cnt) + __u32_field(21, 0, ap_bio_cnt) + __u32_field(22, 0, ap_pending_cnt) + __u32_field(23, 0, rs_pending_cnt) ) GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms, From b666dbf819f9157a4afef2094ec961d216d802b5 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 26 Jul 2012 14:12:59 +0200 Subject: [PATCH 545/609] drbd: Remove redundant and wrong test for NULL simplification in conn_connect() Since the drbd_socket_okay() function itself tests if the the socket is NULL, the explicit test "if (sock.socket && &msock.socket)" was redundent. Apart from that the address opperator ('&') before msock.socket rendered the test pointless. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index a6f0b293836..855cadfe614 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -985,13 +985,9 @@ retry: goto out_release_sockets; } - if (sock.socket && &msock.socket) { - ok = drbd_socket_okay(&sock.socket); - ok = drbd_socket_okay(&msock.socket) && ok; - if (ok) - break; - } - } while (1); + ok = drbd_socket_okay(&sock.socket); + ok = drbd_socket_okay(&msock.socket) && ok; + } while (!ok); if (ad.s_listen) sock_release(ad.s_listen); From 92f14951c044198306f098e76c56a944cf88867a Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 1 Aug 2012 11:41:01 +0200 Subject: [PATCH 546/609] drbd: Try to connec to peer only once per cycle Since now our listening socket is open all the time we will get connection tries of the peer always in. No need to try it three times. This is valid when connecting to older peers as well, it simply increases the probability that the new version DRBD will accept a connection instead that it will establish one. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 855cadfe614..1567e9bb9bd 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -881,7 +881,7 @@ static int conn_connect(struct drbd_tconn *tconn) struct drbd_socket sock, msock; struct drbd_conf *mdev; struct net_conf *nc; - int vnr, timeout, try, h, ok; + int vnr, timeout, h, ok; bool discard_my_data; enum drbd_state_rv rv; struct accept_wait_data ad = { @@ -912,15 +912,7 @@ static int conn_connect(struct drbd_tconn *tconn) do { struct socket *s; - for (try = 0;;) { - /* 3 tries, this should take less than a second! */ - s = drbd_try_connect(tconn); - if (s || ++try >= 3) - break; - /* give the other side time to call bind() & listen() */ - schedule_timeout_interruptible(HZ / 10); - } - + s = drbd_try_connect(tconn); if (s) { if (!sock.socket) { sock.socket = s; @@ -949,10 +941,10 @@ static int conn_connect(struct drbd_tconn *tconn) retry: s = drbd_wait_for_connect(tconn, &ad); if (s) { - try = receive_first_packet(tconn, s); + int fp = receive_first_packet(tconn, s); drbd_socket_okay(&sock.socket); drbd_socket_okay(&msock.socket); - switch (try) { + switch (fp) { case P_INITIAL_DATA: if (sock.socket) { conn_warn(tconn, "initial packet S crossed\n"); From 80c6eed49d5da3ba97cff4dc316ff051486cd1fc Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 1 Aug 2012 14:53:39 +0200 Subject: [PATCH 547/609] drbd: More random to the connect logic Since the listening socket is open all the time, it was possible to get into stable "initial packet S crossed" loops. * when both sides realize in the drbd_socket_okay() call at the end of the loop that the other side closed the main socket you had the chance to get into a stable loop with repeated "packet S crossed" messages. * when both sides do not realize with the drbd_socket_okay() call at the end of the loop that the other side closed the main socket you had the chance to get into a stable loop with alternating "packet S crossed" "packet M crossed" messages. In order to break out these stable loops randomize the behaviour if such a crossing of P_INITIAL_DATA or P_INITIAL_META packets is detected. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1567e9bb9bd..26c30fd64ec 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -949,20 +949,25 @@ retry: if (sock.socket) { conn_warn(tconn, "initial packet S crossed\n"); sock_release(sock.socket); + sock.socket = s; + goto randomize; } sock.socket = s; break; case P_INITIAL_META: + set_bit(DISCARD_CONCURRENT, &tconn->flags); if (msock.socket) { conn_warn(tconn, "initial packet M crossed\n"); sock_release(msock.socket); + msock.socket = s; + goto randomize; } msock.socket = s; - set_bit(DISCARD_CONCURRENT, &tconn->flags); break; default: conn_warn(tconn, "Error receiving initial packet\n"); sock_release(s); +randomize: if (random32() & 1) goto retry; } From 519b6d3eac823e4ceec10484bc06f239047cebbf Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 3 Aug 2012 02:19:09 +0200 Subject: [PATCH 548/609] drbd: fix drbd wire compatibility for empty flushes DRBD has a concept of request epochs or reorder-domains, which are separated on the wire by P_BARRIER packets. Older DRBD is not able to handle zero-sized requests at all, so we need to map empty flushes to these drbd barriers. These are the equivalent of empty flushes, and by default trigger flushes on the receiving side anyways (unless not supported or explicitly disabled), so there is no need to handle this differently in newer drbd either. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 8323449fbba..a9111b68fe2 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -935,6 +935,20 @@ static int drbd_process_write_request(struct drbd_request *req) send_oos = drbd_should_send_out_of_sync(mdev->state); rcu_read_unlock(); + /* Need to replicate writes. Unless it is an empty flush, + * which is better mapped to a DRBD P_BARRIER packet, + * also for drbd wire protocol compatibility reasons. + * If this was a flush, just start a new epoch. + * Unless the current epoch was empty anyways, or we are not currently + * replicating, in which case there is no point. */ + if (unlikely(req->i.size == 0)) { + /* The only size==0 bios we expect are empty flushes. */ + D_ASSERT(req->master_bio->bi_rw & REQ_FLUSH); + if (remote && mdev->tconn->current_tle_writes) + start_new_tl_epoch(mdev->tconn); + return 0; + } + if (!remote && !send_oos) return 0; @@ -1004,8 +1018,10 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long * extent. This waits for any resync activity in the corresponding * resync extent to finish, and, if necessary, pulls in the target * extent into the activity log, which involves further disk io because - * of transactional on-disk meta data updates. */ - if (rw == WRITE && req->private_bio + * of transactional on-disk meta data updates. + * Empty flushes don't need to go into the activity log, they can only + * flush data for pending writes which are already in there. */ + if (rw == WRITE && req->private_bio && req->i.size && !test_bit(AL_SUSPENDED, &mdev->flags)) { req->rq_state |= RQ_IN_ACT_LOG; drbd_al_begin_io(mdev, &req->i); @@ -1047,7 +1063,10 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long if (rw == WRITE) mdev->tconn->current_tle_writes++; - list_add_tail(&req->tl_requests, &mdev->tconn->transfer_log); + /* no point in adding empty flushes to the transfer log, + * they are mapped to drbd barriers already. */ + if (likely(req->i.size!=0)) + list_add_tail(&req->tl_requests, &mdev->tconn->transfer_log); if (rw == WRITE) { if (!drbd_process_write_request(req)) From e959d08d3ef6e48bc3b726bcc07a4ebb9989be94 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Sat, 4 Aug 2012 01:07:55 +0200 Subject: [PATCH 549/609] drbd: Fix a potential issue with the DISCARD_CONCURRENT flag The DISCARD_CONCURRENT flag should be set on one node and cleared on the other node. As the code was before it was theoretical possible that a node accepts the meta socket, but has to close it later on, and keeps the DISCARD_CONCURRENT flag. Correct this by moving the clear_bit(DISCARD_CONCURRENT) where the packet gets sent. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 26c30fd64ec..d507700df85 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -901,8 +901,6 @@ static int conn_connect(struct drbd_tconn *tconn) msock.rbuf = tconn->meta.rbuf; msock.socket = NULL; - clear_bit(DISCARD_CONCURRENT, &tconn->flags); - /* Assume that the peer only understands protocol 80 until we know better. */ tconn->agreed_pro_version = 80; @@ -918,6 +916,7 @@ static int conn_connect(struct drbd_tconn *tconn) sock.socket = s; send_first_packet(tconn, &sock, P_INITIAL_DATA); } else if (!msock.socket) { + clear_bit(DISCARD_CONCURRENT, &tconn->flags); msock.socket = s; send_first_packet(tconn, &msock, P_INITIAL_META); } else { From 8a0bab2a6dd05ee35777370be892daba01750712 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 7 Aug 2012 13:28:00 +0200 Subject: [PATCH 550/609] drbd: Finish requests that completed while IO was frozen Requests of an acked epoch are stored on the barrier_acked_requests list. In case the private bio of such a request completes while IO on the drbd device is suspended [req_mod(completed_ok)] then the request stays there. When thawing IO because the fence_peer handler returned, then we use tl_clear() to apply the connection_lost_while_pending event to all requests on the transfer-log and the barrier_acked_requests list. Up to now the connection_lost_while_pending event was not applied on requests on the barrier_acked_requests list. Fixed that. I.e. now the connection_lost_while_pending and resend events are applied to requests on the barrier_acked_requests list. For that it is necessary that the resend event finishes (local only) READS correctly. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 6 ++++++ drivers/block/drbd/drbd_state.c | 12 +++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index a9111b68fe2..6e8d0976351 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -683,6 +683,12 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case RESEND: + /* Simply complete (local only) READs. */ + if (!(req->rq_state & RQ_WRITE) && !req->w.cb) { + mod_rq_state(req, m, RQ_COMPLETION_SUSP, 0); + break; + } + /* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK before the connection loss (B&C only); only P_BARRIER_ACK (or the local completion?) was missing when we suspended. diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 12f2b4fbe55..c16349aec23 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1503,7 +1503,6 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) if (ns_max.susp_fen) { /* case1: The outdate peer handler is successful: */ if (ns_max.pdsk <= D_OUTDATED) { - tl_clear(tconn); rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) { if (test_bit(NEW_CUR_UUID, &mdev->flags)) { @@ -1512,10 +1511,13 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) } } rcu_read_unlock(); - conn_request_state(tconn, - (union drbd_state) { { .susp_fen = 1 } }, - (union drbd_state) { { .susp_fen = 0 } }, - CS_VERBOSE); + spin_lock_irq(&tconn->req_lock); + _tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING); + _conn_request_state(tconn, + (union drbd_state) { { .susp_fen = 1 } }, + (union drbd_state) { { .susp_fen = 0 } }, + CS_VERBOSE); + spin_unlock_irq(&tconn->req_lock); } /* case2: The connection was established again: */ if (ns_min.conn >= C_CONNECTED) { From 99b4d8fe6d12dcb49126df7153961fbc4de05f2a Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 7 Aug 2012 06:42:09 +0200 Subject: [PATCH 551/609] drbd: only start a new epoch, if the current epoch contains writes Almost all code paths calling start_new_tl_epoch() guarded it with if (... current_tle_writes > 0 ... ). Just move that inside start_new_tl_epoch(). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 6e8d0976351..0c1b6327e20 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -162,6 +162,10 @@ static void wake_all_senders(struct drbd_tconn *tconn) { /* must hold resource->req_lock */ static void start_new_tl_epoch(struct drbd_tconn *tconn) { + /* no point closing an epoch, if it is empty, anyways. */ + if (tconn->current_tle_writes == 0) + return; + tconn->current_tle_writes = 0; atomic_inc(&tconn->current_tle_nr); wake_all_senders(tconn); @@ -861,9 +865,8 @@ static void maybe_pull_ahead(struct drbd_conf *mdev) } if (congested) { - if (mdev->tconn->current_tle_writes) - /* start a new epoch for non-mirrored writes */ - start_new_tl_epoch(mdev->tconn); + /* start a new epoch for non-mirrored writes */ + start_new_tl_epoch(mdev->tconn); if (on_congestion == OC_PULL_AHEAD) _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL); @@ -950,7 +953,7 @@ static int drbd_process_write_request(struct drbd_request *req) if (unlikely(req->i.size == 0)) { /* The only size==0 bios we expect are empty flushes. */ D_ASSERT(req->master_bio->bi_rw & REQ_FLUSH); - if (remote && mdev->tconn->current_tle_writes) + if (remote) start_new_tl_epoch(mdev->tconn); return 0; } @@ -1066,13 +1069,15 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long /* which transfer log epoch does this belong to? */ req->epoch = atomic_read(&mdev->tconn->current_tle_nr); - if (rw == WRITE) - mdev->tconn->current_tle_writes++; /* no point in adding empty flushes to the transfer log, * they are mapped to drbd barriers already. */ - if (likely(req->i.size!=0)) + if (likely(req->i.size!=0)) { + if (rw == WRITE) + mdev->tconn->current_tle_writes++; + list_add_tail(&req->tl_requests, &mdev->tconn->transfer_log); + } if (rw == WRITE) { if (!drbd_process_write_request(req)) From 46e21bbadb2f4770199f2343f311f8833e5bf495 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 7 Aug 2012 06:47:14 +0200 Subject: [PATCH 552/609] drbd: NEG_ACK does not imply a barrier-ack Don't drop a request from the transfer log just because it was NEG_ACKED. We need it around to be able to verify P_BARRIER_ACKs against the transver log. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 0c1b6327e20..67768570141 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -660,7 +660,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case NEG_ACKED: - mod_rq_state(req, m, RQ_NET_OK|RQ_NET_PENDING, RQ_NET_DONE); + mod_rq_state(req, m, RQ_NET_OK|RQ_NET_PENDING, 0); break; case FAIL_FROZEN_DISK_IO: From 232fd3f4a01f45d5402c2e03f7c8815d80bec25b Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 1 Aug 2012 12:30:26 +0200 Subject: [PATCH 553/609] drbd: cleanup, drop unused struct Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index eddc4388a1b..e5c98533c90 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -467,12 +467,6 @@ struct p_drbd06_param { u32 bit_map_gen[5]; } __packed; -struct p_discard { - u64 block_id; - u32 seq_num; - u32 pad; -} __packed; - struct p_block_desc { u64 sector; u32 blksize; From d4dabbe22d38752674c582e5bb40716a72397355 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 1 Aug 2012 12:33:51 +0200 Subject: [PATCH 554/609] drbd: disambiguation, s/P_DISCARD_WRITE/P_SUPERSEDED/ To avoid confusion with REQ_DISCARD aka TRIM, rename our "discard concurrent write acks" from P_DISCARD_WRITE to P_SUPERSEDED. At the same time, rename the drbd request event DISCARD_WRITE to CONFLICT_RESOLVED. It already triggers both successful completion or restart of the request, depending on our RQ_POSTPONED flag. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 ++-- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_receiver.c | 33 +++++++++++++++--------------- drivers/block/drbd/drbd_req.c | 6 +++--- drivers/block/drbd/drbd_req.h | 2 +- 5 files changed, 24 insertions(+), 23 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e5c98533c90..91b73247f65 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -197,7 +197,7 @@ enum drbd_packet { P_RECV_ACK = 0x15, /* Used in protocol B */ P_WRITE_ACK = 0x16, /* Used in protocol C */ P_RS_WRITE_ACK = 0x17, /* Is a P_WRITE_ACK, additionally call set_in_sync(). */ - P_DISCARD_WRITE = 0x18, /* Used in proto C, two-primaries conflict detection */ + P_SUPERSEDED = 0x18, /* Used in proto C, two-primaries conflict detection */ P_NEG_ACK = 0x19, /* Sent if local disk is unusable */ P_NEG_DREPLY = 0x1a, /* Local disk is broken... */ P_NEG_RS_DREPLY = 0x1b, /* Local disk is broken... */ @@ -335,7 +335,7 @@ struct p_data { * commands which share a struct: * p_block_ack: * P_RECV_ACK (proto B), P_WRITE_ACK (proto C), - * P_DISCARD_WRITE (proto C, two-primaries conflict detection) + * P_SUPERSEDED (proto C, two-primaries conflict detection) * p_block_req: * P_DATA_REQUEST, P_RS_DATA_REQUEST */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 85d95ec405e..328ce80fecf 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3339,7 +3339,7 @@ const char *cmdname(enum drbd_packet cmd) [P_RECV_ACK] = "RecvAck", [P_WRITE_ACK] = "WriteAck", [P_RS_WRITE_ACK] = "RSWriteAck", - [P_DISCARD_WRITE] = "DiscardWrite", + [P_SUPERSEDED] = "Superseded", [P_NEG_ACK] = "NegAck", [P_NEG_DREPLY] = "NegDReply", [P_NEG_RS_DREPLY] = "NegRSDReply", diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index d507700df85..afab66bbc31 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -425,7 +425,7 @@ static int drbd_finish_peer_reqs(struct drbd_conf *mdev) drbd_free_net_peer_req(mdev, peer_req); /* possible callbacks here: - * e_end_block, and e_end_resync_block, e_send_discard_write. + * e_end_block, and e_end_resync_block, e_send_superseded. * all ignore the last argument. */ list_for_each_entry_safe(peer_req, t, &work_list, w.list) { @@ -1803,7 +1803,7 @@ static void restart_conflicting_writes(struct drbd_conf *mdev, continue; /* as it is RQ_POSTPONED, this will cause it to * be queued on the retry workqueue. */ - __req_mod(req, DISCARD_WRITE, NULL); + __req_mod(req, CONFLICT_RESOLVED, NULL); } } @@ -1864,9 +1864,9 @@ static int e_send_ack(struct drbd_work *w, enum drbd_packet ack) return err; } -static int e_send_discard_write(struct drbd_work *w, int unused) +static int e_send_superseded(struct drbd_work *w, int unused) { - return e_send_ack(w, P_DISCARD_WRITE); + return e_send_ack(w, P_SUPERSEDED); } static int e_send_retry_write(struct drbd_work *w, int unused) @@ -1874,7 +1874,7 @@ static int e_send_retry_write(struct drbd_work *w, int unused) struct drbd_tconn *tconn = w->mdev->tconn; return e_send_ack(w, tconn->agreed_pro_version >= 100 ? - P_RETRY_WRITE : P_DISCARD_WRITE); + P_RETRY_WRITE : P_SUPERSEDED); } static bool seq_greater(u32 a, u32 b) @@ -2082,11 +2082,11 @@ static int handle_write_conflicts(struct drbd_conf *mdev, if (resolve_conflicts) { /* * If the peer request is fully contained within the - * overlapping request, it can be discarded; otherwise, - * it will be retried once all overlapping requests - * have completed. + * overlapping request, it can be considered overwritten + * and thus superseded; otherwise, it will be retried + * once all overlapping requests have completed. */ - bool discard = i->sector <= sector && i->sector + + bool superseded = i->sector <= sector && i->sector + (i->size >> 9) >= sector + (size >> 9); if (!equal) @@ -2095,10 +2095,10 @@ static int handle_write_conflicts(struct drbd_conf *mdev, "assuming %s came first\n", (unsigned long long)i->sector, i->size, (unsigned long long)sector, size, - discard ? "local" : "remote"); + superseded ? "local" : "remote"); inc_unacked(mdev); - peer_req->w.cb = discard ? e_send_discard_write : + peer_req->w.cb = superseded ? e_send_superseded : e_send_retry_write; list_add_tail(&peer_req->w.list, &mdev->done_ee); wake_asender(mdev->tconn); @@ -2119,8 +2119,9 @@ static int handle_write_conflicts(struct drbd_conf *mdev, !(req->rq_state & RQ_POSTPONED)) { /* * Wait for the node with the discard flag to - * decide if this request will be discarded or - * retried. Requests that are discarded will + * decide if this request has been superseded + * or needs to be retried. + * Requests that have been superseded will * disappear from the write_requests tree. * * In addition, wait for the conflicting @@ -4994,8 +4995,8 @@ static int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi) case P_RECV_ACK: what = RECV_ACKED_BY_PEER; break; - case P_DISCARD_WRITE: - what = DISCARD_WRITE; + case P_SUPERSEDED: + what = CONFLICT_RESOLVED; break; case P_RETRY_WRITE: what = POSTPONE_WRITE; @@ -5220,7 +5221,7 @@ static struct asender_cmd asender_tbl[] = { [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, - [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, + [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck }, [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck }, [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply }, [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply }, diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 67768570141..266ef24b3c7 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -611,13 +611,13 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, RQ_NET_DONE); break; - case DISCARD_WRITE: - /* for discarded conflicting writes of multiple primaries, + case CONFLICT_RESOLVED: + /* for superseded conflicting writes of multiple primaries, * there is no need to keep anything in the tl, potential * node crashes are covered by the activity log. * * If this request had been marked as RQ_POSTPONED before, - * it will actually not be discarded, but "restarted", + * it will actually not be completed, but "restarted", * resubmitted from the retry worker context. */ D_ASSERT(req->rq_state & RQ_NET_PENDING); D_ASSERT(req->rq_state & RQ_EXP_WRITE_ACK); diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 9611713c358..016de6b8bb5 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -97,7 +97,7 @@ enum drbd_req_event { RECV_ACKED_BY_PEER, WRITE_ACKED_BY_PEER, WRITE_ACKED_BY_PEER_AND_SIS, /* and set_in_sync */ - DISCARD_WRITE, + CONFLICT_RESOLVED, POSTPONE_WRITE, NEG_ACKED, BARRIER_ACKED, /* in protocol A and B */ From 427c0434fc52f15bd762ddc6611e874df6cc5b8d Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 1 Aug 2012 12:43:01 +0200 Subject: [PATCH 555/609] drbd: disambiguation, s/DISCARD_CONCURRENT/RESOLVE_CONFLICTS/ We don't discard anything here, really. We resolve conflicting, concurrent writes to overlapping data blocks. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_nl.c | 2 +- drivers/block/drbd/drbd_receiver.c | 20 ++++++++++---------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 91b73247f65..813f50dbe5c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -799,7 +799,7 @@ extern struct fifo_buffer *fifo_alloc(int fifo_size); /* flag bits per tconn */ enum { NET_CONGESTED, /* The data socket is congested */ - DISCARD_CONCURRENT, /* Set on one node, cleared on the peer! */ + RESOLVE_CONFLICTS, /* Set on one node, cleared on the peer! */ SEND_PING, /* whether asender should send a ping asap */ SIGNAL_ASENDER, /* whether asender wants to be interrupted */ GOT_PING_ACK, /* set when we receive a ping_ack packet, ping_wait gets woken */ diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 466d6b1d930..35bb572a207 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2249,7 +2249,7 @@ void resync_after_online_grow(struct drbd_conf *mdev) if (mdev->state.role != mdev->state.peer) iass = (mdev->state.role == R_PRIMARY); else - iass = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags); + iass = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags); if (iass) drbd_start_resync(mdev, C_SYNC_SOURCE); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index afab66bbc31..99121076bf1 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -916,7 +916,7 @@ static int conn_connect(struct drbd_tconn *tconn) sock.socket = s; send_first_packet(tconn, &sock, P_INITIAL_DATA); } else if (!msock.socket) { - clear_bit(DISCARD_CONCURRENT, &tconn->flags); + clear_bit(RESOLVE_CONFLICTS, &tconn->flags); msock.socket = s; send_first_packet(tconn, &msock, P_INITIAL_META); } else { @@ -954,7 +954,7 @@ retry: sock.socket = s; break; case P_INITIAL_META: - set_bit(DISCARD_CONCURRENT, &tconn->flags); + set_bit(RESOLVE_CONFLICTS, &tconn->flags); if (msock.socket) { conn_warn(tconn, "initial packet M crossed\n"); sock_release(msock.socket); @@ -1899,7 +1899,7 @@ static bool need_peer_seq(struct drbd_conf *mdev) /* * We only need to keep track of the last packet_seq number of our peer - * if we are in dual-primary mode and we have the discard flag set; see + * if we are in dual-primary mode and we have the resolve-conflicts flag set; see * handle_write_conflicts(). */ @@ -1907,7 +1907,7 @@ static bool need_peer_seq(struct drbd_conf *mdev) tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries; rcu_read_unlock(); - return tp && test_bit(DISCARD_CONCURRENT, &tconn->flags); + return tp && test_bit(RESOLVE_CONFLICTS, &tconn->flags); } static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq) @@ -2048,7 +2048,7 @@ static int handle_write_conflicts(struct drbd_conf *mdev, struct drbd_peer_request *peer_req) { struct drbd_tconn *tconn = mdev->tconn; - bool resolve_conflicts = test_bit(DISCARD_CONCURRENT, &tconn->flags); + bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &tconn->flags); sector_t sector = peer_req->i.sector; const unsigned int size = peer_req->i.size; struct drbd_interval *i; @@ -2600,7 +2600,7 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) "Using discard-least-changes instead\n"); case ASB_DISCARD_ZERO_CHG: if (ch_peer == 0 && ch_self == 0) { - rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) + rv = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags) ? -1 : 1; break; } else { @@ -2616,7 +2616,7 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) rv = 1; else /* ( ch_self == ch_peer ) */ /* Well, then use something else. */ - rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) + rv = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags) ? -1 : 1; break; case ASB_DISCARD_LOCAL: @@ -2839,7 +2839,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l case 1: /* self_pri && !peer_pri */ return 1; case 2: /* !self_pri && peer_pri */ return -1; case 3: /* self_pri && peer_pri */ - dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags); + dc = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags); return dc ? -1 : 1; } } @@ -3775,7 +3775,7 @@ static int receive_req_state(struct drbd_tconn *tconn, struct packet_info *pi) mask.i = be32_to_cpu(p->mask); val.i = be32_to_cpu(p->val); - if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) && + if (test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags) && mutex_is_locked(mdev->state_mutex)) { drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG); return 0; @@ -3801,7 +3801,7 @@ static int receive_req_conn_state(struct drbd_tconn *tconn, struct packet_info * mask.i = be32_to_cpu(p->mask); val.i = be32_to_cpu(p->val); - if (test_bit(DISCARD_CONCURRENT, &tconn->flags) && + if (test_bit(RESOLVE_CONFLICTS, &tconn->flags) && mutex_is_locked(&tconn->cstate_mutex)) { conn_send_sr_reply(tconn, SS_CONCURRENT_ST_CHG); return 0; From eb12010e9af119c84e6b2214064a98681027e0e3 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 1 Aug 2012 12:46:20 +0200 Subject: [PATCH 556/609] drbd: disambiguation, s/ERR_DISCARD/ERR_DISCARD_IMPOSSIBLE/ If for some reason (typically "split-brained" cluster manager) drbd replica data has diverged, we can chose a victim, and reconnect using "--discard-my-data", causing the victim to become sync-target, fetching all changed blocks from the peer. If we are Primary, we are potentially in use, and we refuse to "roll back" changes to the data below the page cache and other users. Rename the error symbol for this to ERR_DISCARD_IMPOSSIBLE. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 2 +- include/linux/drbd.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 35bb572a207..d1073705bf1 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1829,7 +1829,7 @@ _check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct n return ERR_STONITH_AND_PROT_A; } if (mdev->state.role == R_PRIMARY && new_conf->discard_my_data) - return ERR_DISCARD; + return ERR_DISCARD_IMPOSSIBLE; } if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 5171c353088..0b93e5e2e06 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -136,7 +136,7 @@ enum drbd_ret_code { ERR_AUTH_ALG = 120, ERR_AUTH_ALG_ND = 121, ERR_NOMEM = 122, - ERR_DISCARD = 123, + ERR_DISCARD_IMPOSSIBLE = 123, ERR_DISK_CONFIGURED = 124, ERR_NET_CONFIGURED = 125, ERR_MANDATORY_TAG = 126, From 715306f69d85f7ea21eaef4efe75b8364cfea1d5 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 10 Aug 2012 17:00:30 +0200 Subject: [PATCH 557/609] drbd: Don't unregister socket state_change callback from within the callback Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 31 +++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 99121076bf1..14df37ccd52 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -679,21 +679,15 @@ struct accept_wait_data { }; -static void incomming_connection(struct sock *sk) +static void drbd_incoming_connection(struct sock *sk) { struct accept_wait_data *ad = sk->sk_user_data; - struct drbd_tconn *tconn = ad->tconn; + void (*state_change)(struct sock *sk); - if (sk->sk_state != TCP_ESTABLISHED) - conn_warn(tconn, "unexpected tcp state change. sk_state = %d\n", sk->sk_state); - - write_lock_bh(&sk->sk_callback_lock); - sk->sk_state_change = ad->original_sk_state_change; - sk->sk_user_data = NULL; - write_unlock_bh(&sk->sk_callback_lock); - - sk->sk_state_change(sk); - complete(&ad->door_bell); + state_change = ad->original_sk_state_change; + if (sk->sk_state == TCP_ESTABLISHED) + complete(&ad->door_bell); + state_change(sk); } static int prepare_listen_socket(struct drbd_tconn *tconn, struct accept_wait_data *ad) @@ -736,7 +730,7 @@ static int prepare_listen_socket(struct drbd_tconn *tconn, struct accept_wait_da ad->s_listen = s_listen; write_lock_bh(&s_listen->sk->sk_callback_lock); ad->original_sk_state_change = s_listen->sk->sk_state_change; - s_listen->sk->sk_state_change = incomming_connection; + s_listen->sk->sk_state_change = drbd_incoming_connection; s_listen->sk->sk_user_data = ad; write_unlock_bh(&s_listen->sk->sk_callback_lock); @@ -759,6 +753,14 @@ out: return -EIO; } +static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad) +{ + write_lock_bh(&sk->sk_callback_lock); + sk->sk_state_change = ad->original_sk_state_change; + sk->sk_user_data = NULL; + write_unlock_bh(&sk->sk_callback_lock); +} + static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn, struct accept_wait_data *ad) { int timeo, connect_int, err = 0; @@ -789,6 +791,9 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn, struct acc } } + if (s_estab) + unregister_state_change(s_estab->sk, ad); + return s_estab; } From 5af2e8ce2b463c2cc8e4a074f9d8f660ae7c1d8f Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 14 Aug 2012 11:28:52 +0200 Subject: [PATCH 558/609] drbd: Fix completion of requests while the device is suspended In various places (E.g. CONNECTION_LOST_WHILE_PENDING) the RQ_COMPLETION_SUSP mask is passed in the clear set to mod_rq_state(). The issue was that it tried to clear the RQ_COMPLETION_SUSP bit out of the state mask first, and eventuelly set it afterwards, in the drbd_req_put_completion_ref() function. Fixed that by moving the reference getting out of drbd_req_put_completion_ref() into the mod_rq_state(), before the place where the extra reference might be put. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 266ef24b3c7..5ddb01edd93 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -304,18 +304,6 @@ static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_ if (!atomic_sub_and_test(put, &req->completion_ref)) return 0; - if (drbd_suspended(mdev)) { - /* We do not allow completion while suspended. Re-get a - * reference, so whatever happens when this is resumed - * may put and complete. */ - - D_ASSERT(!(req->rq_state & RQ_COMPLETION_SUSP)); - req->rq_state |= RQ_COMPLETION_SUSP; - atomic_inc(&req->completion_ref); - return 0; - } - - /* else */ drbd_req_complete(req, m); if (req->rq_state & RQ_POSTPONED) { @@ -338,6 +326,9 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, int c_put = 0; int k_put = 0; + if (drbd_suspended(mdev) && !((s | clear) & RQ_COMPLETION_SUSP)) + set |= RQ_COMPLETION_SUSP; + /* apply */ req->rq_state &= ~clear; @@ -366,6 +357,9 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) atomic_add(req->i.size >> 9, &mdev->ap_in_flight); + if (!(s & RQ_COMPLETION_SUSP) && (set & RQ_COMPLETION_SUSP)) + atomic_inc(&req->completion_ref); + /* progress: put references */ if ((s & RQ_COMPLETION_SUSP) && (clear & RQ_COMPLETION_SUSP)) From fef45d297e447d710abcf0cd0bdbf8738ff469eb Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 14 Aug 2012 11:46:59 +0200 Subject: [PATCH 559/609] drbd: Write all pages of the bitmap after an online resize We need to write the whole bitmap after we moved the meta data due to an online resize operation. With the support for one peta byte devices bitmap IO was optimized to only write out touched pages. This optimization must be turned off when writing the bitmap after an online resize. This issue was introduced with drbd-8.3.10. The impact of this bug is that after an online resize, the next resync could become larger than expected. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 16 +++++++++++++++- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_nl.c | 4 ++-- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 33626e34c92..4a076b2553e 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -922,6 +922,7 @@ struct bm_aio_ctx { unsigned flags; #define BM_AIO_COPY_PAGES 1 #define BM_AIO_WRITE_HINTED 2 +#define BM_WRITE_ALL_PAGES 4 int error; struct kref kref; }; @@ -1096,7 +1097,9 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w !test_and_clear_bit(BM_PAGE_HINT_WRITEOUT, &page_private(b->bm_pages[i]))) continue; - if (bm_test_page_unchanged(b->bm_pages[i])) { + + if (!(flags & BM_WRITE_ALL_PAGES) && + bm_test_page_unchanged(b->bm_pages[i])) { dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i); continue; } @@ -1180,6 +1183,17 @@ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) return bm_rw(mdev, WRITE, 0, 0); } +/** + * drbd_bm_write_all() - Write the whole bitmap to its on disk location. + * @mdev: DRBD device. + * + * Will write all pages. + */ +int drbd_bm_write_all(struct drbd_conf *mdev) __must_hold(local) +{ + return bm_rw(mdev, WRITE, BM_WRITE_ALL_PAGES, 0); +} + /** * drbd_bm_lazy_write_out() - Write bitmap pages 0 to @upper_idx-1, if they have changed. * @mdev: DRBD device. diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 813f50dbe5c..d8b3c88d6f1 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1329,6 +1329,7 @@ extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); extern void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr); extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); extern int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local); +extern int drbd_bm_write_all(struct drbd_conf *mdev) __must_hold(local); extern int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local); extern size_t drbd_bm_words(struct drbd_conf *mdev); extern unsigned long drbd_bm_bits(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index d1073705bf1..c02d5265c39 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -894,8 +894,8 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds la_size_changed && md_moved ? "size changed and md moved" : la_size_changed ? "size changed" : "md moved"); /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */ - err = drbd_bitmap_io(mdev, &drbd_bm_write, - "size changed", BM_LOCKED_MASK); + err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write, + "size changed", BM_LOCKED_MASK); if (err) { rv = dev_size_error; goto out; From 39a1aa7f49dc8eae5c8d3a4bf759eb7abeabe6c0 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 8 Aug 2012 21:19:09 +0200 Subject: [PATCH 560/609] drbd: Protect accesses to the uuid set with a spinlock There is at least the worker context, the receiver context, the context of receiving netlink packts. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 +++- drivers/block/drbd/drbd_main.c | 33 ++++++++++++++++++++++++------ drivers/block/drbd/drbd_nl.c | 12 ++++++++++- drivers/block/drbd/drbd_receiver.c | 11 +++++++--- 4 files changed, 49 insertions(+), 11 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index d8b3c88d6f1..b83398d64a9 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -747,6 +747,7 @@ struct drbd_md { u64 md_offset; /* sector offset to 'super' block */ u64 la_size_sect; /* last agreed size, unit sectors */ + spinlock_t uuid_lock; u64 uuid[UI_SIZE]; u64 device_uuid; u32 flags; @@ -1119,8 +1120,9 @@ extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev); extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); extern void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); extern void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local); -extern void _drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local); extern void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local); +extern void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local); +extern void __drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); extern void drbd_md_set_flag(struct drbd_conf *mdev, int flags) __must_hold(local); extern void drbd_md_clear_flag(struct drbd_conf *mdev, int flags)__must_hold(local); extern int drbd_md_test_flag(struct drbd_backing_dev *, int); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 328ce80fecf..d831e85f151 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -838,8 +838,10 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) put_ldev(mdev); return -EIO; } + spin_lock_irq(&mdev->ldev->md.uuid_lock); for (i = UI_CURRENT; i < UI_SIZE; i++) p->uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0; + spin_unlock_irq(&mdev->ldev->md.uuid_lock); mdev->comm_bm_set = drbd_bm_total_weight(mdev); p->uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set); @@ -3015,7 +3017,7 @@ void drbd_md_mark_dirty(struct drbd_conf *mdev) } #endif -static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local) +void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local) { int i; @@ -3023,7 +3025,7 @@ static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local) mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i]; } -void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) +void __drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) { if (idx == UI_CURRENT) { if (mdev->state.role == R_PRIMARY) @@ -3038,14 +3040,24 @@ void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) drbd_md_mark_dirty(mdev); } +void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) +{ + unsigned long flags; + spin_lock_irqsave(&mdev->ldev->md.uuid_lock, flags); + __drbd_uuid_set(mdev, idx, val); + spin_unlock_irqrestore(&mdev->ldev->md.uuid_lock, flags); +} void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) { + unsigned long flags; + spin_lock_irqsave(&mdev->ldev->md.uuid_lock, flags); if (mdev->ldev->md.uuid[idx]) { drbd_uuid_move_history(mdev); mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx]; } - _drbd_uuid_set(mdev, idx, val); + __drbd_uuid_set(mdev, idx, val); + spin_unlock_irqrestore(&mdev->ldev->md.uuid_lock, flags); } /** @@ -3058,15 +3070,20 @@ void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local) { u64 val; - unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP]; + unsigned long long bm_uuid; + + get_random_bytes(&val, sizeof(u64)); + + spin_lock_irq(&mdev->ldev->md.uuid_lock); + bm_uuid = mdev->ldev->md.uuid[UI_BITMAP]; if (bm_uuid) dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid); mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT]; + __drbd_uuid_set(mdev, UI_CURRENT, val); + spin_unlock_irq(&mdev->ldev->md.uuid_lock); - get_random_bytes(&val, sizeof(u64)); - _drbd_uuid_set(mdev, UI_CURRENT, val); drbd_print_uuids(mdev, "new current UUID"); /* get it to stable storage _now_ */ drbd_md_sync(mdev); @@ -3074,9 +3091,11 @@ void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local) void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local) { + unsigned long flags; if (mdev->ldev->md.uuid[UI_BITMAP] == 0 && val == 0) return; + spin_lock_irqsave(&mdev->ldev->md.uuid_lock, flags); if (val == 0) { drbd_uuid_move_history(mdev); mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP]; @@ -3088,6 +3107,8 @@ void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local) mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1); } + spin_unlock_irqrestore(&mdev->ldev->md.uuid_lock, flags); + drbd_md_mark_dirty(mdev); } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index c02d5265c39..d31a0261e83 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1320,6 +1320,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) retcode = ERR_NOMEM; goto fail; } + spin_lock_init(&nbc->md.uuid_lock); + new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); if (!new_disk_conf) { retcode = ERR_NOMEM; @@ -2679,8 +2681,16 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, goto nla_put_failure; if (got_ldev) { + int err; + + spin_lock_irq(&mdev->ldev->md.uuid_lock); + err = nla_put(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid); + spin_unlock_irq(&mdev->ldev->md.uuid_lock); + + if (err) + goto nla_put_failure; + if (nla_put_u32(skb, T_disk_flags, mdev->ldev->md.flags) || - nla_put(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid) || nla_put_u64(skb, T_bits_total, drbd_bm_bits(mdev)) || nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(mdev))) goto nla_put_failure; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 14df37ccd52..0eefbeb6566 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2796,7 +2796,9 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) && (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) { dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n"); - drbd_uuid_set_bm(mdev, 0UL); + drbd_uuid_move_history(mdev); + mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP]; + mdev->ldev->md.uuid[UI_BITMAP] = 0; drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0); @@ -2904,8 +2906,8 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l if (mdev->tconn->agreed_pro_version < 91) return -1091; - _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]); - _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]); + __drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]); + __drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]); dev_info(DEV, "Last syncUUID did not get through, corrected:\n"); drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, @@ -2959,11 +2961,14 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol mydisk = mdev->new_state_tmp.disk; dev_info(DEV, "drbd_sync_handshake:\n"); + + spin_lock_irq(&mdev->ldev->md.uuid_lock); drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0); drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]); hg = drbd_uuid_compare(mdev, &rule_nr); + spin_unlock_irq(&mdev->ldev->md.uuid_lock); dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr); From b66623e33efbbf55717df7bfc49882371118b866 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 8 Aug 2012 21:19:09 +0200 Subject: [PATCH 561/609] drbd: Avoid NetworkFailure state during disconnect Disconnecting is a cluster wide state change. In case the peer node agrees to the state transition, it sends back the fact on the meta-data connection and closes both sockets. In case the node node that initiated the state transfer sees the closing action on the data-socket, before the P_STATE_CHG_REPLY packet, it was going into one of the network failure states. At least with the fencing option set to something else thatn "dont-care", the unclean shutdown of the connection causes a short IO freeze or a fence operation. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_receiver.c | 30 +++++++++++++++++++++++++++++- drivers/block/drbd/drbd_state.c | 3 +++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index b83398d64a9..37ae87e468a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -816,6 +816,7 @@ enum { * so shrink_page_list() would not recurse into, * and potentially deadlock on, this drbd worker. */ + DISCONNECT_SENT, }; struct drbd_tconn { /* is a resource from the config file */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 0eefbeb6566..1a8f698021a 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -522,7 +522,6 @@ static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size) conn_err(tconn, "sock_recvmsg returned %d\n", rv); break; } else if (rv == 0) { - conn_info(tconn, "sock was shut down by peer\n"); break; } else { /* signal came in, or peer/link went down, @@ -535,9 +534,25 @@ static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size) set_fs(oldfs); + if (rv == 0) { + if (test_bit(DISCONNECT_SENT, &tconn->flags)) { + long t; + rcu_read_lock(); + t = rcu_dereference(tconn->net_conf)->ping_timeo * HZ/10; + rcu_read_unlock(); + + t = wait_event_timeout(tconn->ping_wait, tconn->cstate < C_WF_REPORT_PARAMS, t); + + if (t) + goto out; + } + conn_info(tconn, "sock was shut down by peer\n"); + } + if (rv != size) conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD); +out: return rv; } @@ -894,6 +909,7 @@ static int conn_connect(struct drbd_tconn *tconn) .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell), }; + clear_bit(DISCONNECT_SENT, &tconn->flags); if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS) return -2; @@ -5316,6 +5332,18 @@ int drbd_asender(struct drbd_thread *thi) received += rv; buf += rv; } else if (rv == 0) { + if (test_bit(DISCONNECT_SENT, &tconn->flags)) { + long t; + rcu_read_lock(); + t = rcu_dereference(tconn->net_conf)->ping_timeo * HZ/10; + rcu_read_unlock(); + + t = wait_event_timeout(tconn->ping_wait, + tconn->cstate < C_WF_REPORT_PARAMS, + t); + if (t) + break; + } conn_err(tconn, "meta connection shut down by peer.\n"); goto reconnect; } else if (rv == -EAGAIN) { diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index c16349aec23..4fda4e2024e 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1742,6 +1742,9 @@ conn_cl_wide(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state v goto abort; } + if (val.conn == C_DISCONNECTING) + set_bit(DISCONNECT_SENT, &tconn->flags); + wait_event(tconn->ping_wait, (rv = _conn_rq_cond(tconn, mask, val))); clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); From 155522df5b8ac24ee66a903e51d5b3023b2a76f9 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 8 Aug 2012 21:19:09 +0200 Subject: [PATCH 562/609] drbd: Remove dead code Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 36 ++++++------------------------ 1 file changed, 7 insertions(+), 29 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1a8f698021a..4ba530cf810 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -504,37 +504,15 @@ static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size) oldfs = get_fs(); set_fs(KERNEL_DS); - - for (;;) { - rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags); - if (rv == size) - break; - - /* Note: - * ECONNRESET other side closed the connection - * ERESTARTSYS (on sock) we got a signal - */ - - if (rv < 0) { - if (rv == -ECONNRESET) - conn_info(tconn, "sock was reset by peer\n"); - else if (rv != -ERESTARTSYS) - conn_err(tconn, "sock_recvmsg returned %d\n", rv); - break; - } else if (rv == 0) { - break; - } else { - /* signal came in, or peer/link went down, - * after we read a partial message - */ - /* D_ASSERT(signal_pending(current)); */ - break; - } - }; - + rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags); set_fs(oldfs); - if (rv == 0) { + if (rv < 0) { + if (rv == -ECONNRESET) + conn_info(tconn, "sock was reset by peer\n"); + else if (rv != -ERESTARTSYS) + conn_err(tconn, "sock_recvmsg returned %d\n", rv); + } else if (rv == 0) { if (test_bit(DISCONNECT_SENT, &tconn->flags)) { long t; rcu_read_lock(); From 08332d73250eec349b055843a503d45a9b5c13b6 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 17 Aug 2012 15:09:13 +0200 Subject: [PATCH 563/609] drbd: properly call drbd_rs_cancel_all() in drbd_disconnected() drbd_disconnected() is supposed to clear the resync lru cache, by calling drbd_rs_cancel_all(). We must do so before we call drbd_flush_workqueue(), as at least the callback w_restart_disk_io() may wait for resync progres, and would otherwise deadlock. drbd_finish_peer_reqs() may again populate that cache, which will then potentially be stale after the next resync handshake and bitmap exchange, we have to do it again after that. A stale resync lru cache causes no harm but ugly messages like this: BAD! sector=196608s enr=6 rs_left=-256 rs_failed=0 count=256 cstate=SyncTarget Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 4ba530cf810..d5afa0a81fd 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4526,6 +4526,10 @@ static int drbd_disconnected(struct drbd_conf *mdev) necessary to reclain net_ee in drbd_finish_peer_reqs(). */ drbd_flush_workqueue(mdev); + /* need to do it again, drbd_finish_peer_reqs() may have populated it + * again via drbd_try_clear_on_disk_bm(). */ + drbd_rs_cancel_all(mdev); + kfree(mdev->p_uuid); mdev->p_uuid = NULL; From 4eb9b3cba00471a01699cceb0f4b1f0cb8111ee2 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 20 Aug 2012 11:05:23 +0200 Subject: [PATCH 564/609] drbd: don't send out P_BARRIER with stale information We must only send P_BARRIER for epochs we actually sent P_DATA in. If we (re-)establish a connection, we reinitialized the send.current_epoch_nr, but forgot to reset send.current_epoch_writes. This could result in a spurious P_BARRIER with stale epoch information, and a disconnect/reconnect cycle once the then "unexpected" P_BARRIER_ACK is received: BAD! BarrierAck #28823 received, expected #28829! Introduce re_init_if_first_write() and maybe_send_barrier() helpers, and call them appropriately for read/write/set-out-of-sync requests. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_worker.c | 49 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 1c9c6fd332c..c674f17773a 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1265,6 +1265,27 @@ int w_send_write_hint(struct drbd_work *w, int cancel) return drbd_send_command(mdev, sock, P_UNPLUG_REMOTE, 0, NULL, 0); } +static void re_init_if_first_write(struct drbd_tconn *tconn, unsigned int epoch) +{ + if (!tconn->send.seen_any_write_yet) { + tconn->send.seen_any_write_yet = true; + tconn->send.current_epoch_nr = epoch; + tconn->send.current_epoch_writes = 0; + } +} + +static void maybe_send_barrier(struct drbd_tconn *tconn, unsigned int epoch) +{ + /* re-init if first write on this connection */ + if (!tconn->send.seen_any_write_yet) + return; + if (tconn->send.current_epoch_nr != epoch) { + if (tconn->send.current_epoch_writes) + drbd_send_barrier(tconn); + tconn->send.current_epoch_nr = epoch; + } +} + int w_send_out_of_sync(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); @@ -1277,19 +1298,11 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel) return 0; } - if (!tconn->send.seen_any_write_yet) { - tconn->send.seen_any_write_yet = true; - tconn->send.current_epoch_nr = req->epoch; - } - if (tconn->send.current_epoch_nr != req->epoch) { - if (tconn->send.current_epoch_writes) - drbd_send_barrier(tconn); - tconn->send.current_epoch_nr = req->epoch; - } /* this time, no tconn->send.current_epoch_writes++; * If it was sent, it was the closing barrier for the last * replicated epoch, before we went into AHEAD mode. * No more barriers will be sent, until we leave AHEAD mode again. */ + maybe_send_barrier(tconn, req->epoch); err = drbd_send_out_of_sync(mdev, req); req_mod(req, OOS_HANDED_TO_NETWORK); @@ -1315,15 +1328,8 @@ int w_send_dblock(struct drbd_work *w, int cancel) return 0; } - if (!tconn->send.seen_any_write_yet) { - tconn->send.seen_any_write_yet = true; - tconn->send.current_epoch_nr = req->epoch; - } - if (tconn->send.current_epoch_nr != req->epoch) { - if (tconn->send.current_epoch_writes) - drbd_send_barrier(tconn); - tconn->send.current_epoch_nr = req->epoch; - } + re_init_if_first_write(tconn, req->epoch); + maybe_send_barrier(tconn, req->epoch); tconn->send.current_epoch_writes++; err = drbd_send_dblock(mdev, req); @@ -1352,12 +1358,7 @@ int w_send_read_req(struct drbd_work *w, int cancel) /* Even read requests may close a write epoch, * if there was any yet. */ - if (tconn->send.seen_any_write_yet && - tconn->send.current_epoch_nr != req->epoch) { - if (tconn->send.current_epoch_writes) - drbd_send_barrier(tconn); - tconn->send.current_epoch_nr = req->epoch; - } + maybe_send_barrier(tconn, req->epoch); err = drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size, (unsigned long)req); From 0ee98e2eb0c85f27b6f24a15d59fb54f99a93840 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 20 Aug 2012 14:54:48 +0200 Subject: [PATCH 565/609] drbd: temporarily suspend io in drbd_adm_disk_opts drbd_adm_disk_opts() does wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); drbd_al_shrink(mdev); If the device is very busy, this can take a very long time to succeed. Fix this by temporarily suspending IO, then quickly change the settings, and resume. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index d31a0261e83..363034a77e8 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1210,11 +1210,13 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) } } + drbd_suspend_io(mdev); wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); drbd_al_shrink(mdev); err = drbd_check_al_size(mdev, new_disk_conf); lc_unlock(mdev->act_log); wake_up(&mdev->al_wait); + drbd_resume_io(mdev); if (err) { retcode = ERR_NOMEM; From c1fd29a11f433ca8ae37723768016ffe6cdd487b Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 21 Aug 2012 20:34:07 +0200 Subject: [PATCH 566/609] drbd: Fix a race condition that can lead to a BUG() If the preconditions for a state change change after the wait_event() we might hit the BUG() statement in conn_set_state(). With holding the spin_lock while evaluating the condition AND until the actual state change we ensure the the preconditions can not change anymore. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 27 +++++++++++++++++++++++++++ drivers/block/drbd/drbd_state.c | 14 ++++++-------- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 37ae87e468a..1c1576b942b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -2301,3 +2301,30 @@ static inline void drbd_md_flush(struct drbd_conf *mdev) } #endif + +/* This is defined in drivers/md/md.h as well. Should go into wait.h */ +#define __wait_event_lock_irq(wq, condition, lock, cmd) \ +do { \ + wait_queue_t __wait; \ + init_waitqueue_entry(&__wait, current); \ + \ + add_wait_queue(&wq, &__wait); \ + for (;;) { \ + set_current_state(TASK_UNINTERRUPTIBLE); \ + if (condition) \ + break; \ + spin_unlock_irq(&lock); \ + cmd; \ + schedule(); \ + spin_lock_irq(&lock); \ + } \ + current->state = TASK_RUNNING; \ + remove_wait_queue(&wq, &__wait); \ +} while (0) + +#define wait_event_lock_irq(wq, condition, lock, cmd) \ +do { \ + if (condition) \ + break; \ + __wait_event_lock_irq(wq, condition, lock, cmd); \ +} while (0) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 4fda4e2024e..ce1495187f0 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1710,7 +1710,6 @@ _conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags)) return SS_CW_FAILED_BY_PEER; - spin_lock_irq(&tconn->req_lock); rv = tconn->cstate != C_WF_REPORT_PARAMS ? SS_CW_NO_NEED : SS_UNKNOWN_ERROR; if (rv == SS_UNKNOWN_ERROR) @@ -1719,8 +1718,6 @@ _conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state if (rv == SS_SUCCESS) rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ - spin_unlock_irq(&tconn->req_lock); - return rv; } @@ -1736,21 +1733,22 @@ conn_cl_wide(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state v set_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); if (conn_send_state_req(tconn, mask, val)) { clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); - rv = SS_CW_FAILED_BY_PEER; /* if (f & CS_VERBOSE) print_st_err(mdev, os, ns, rv); */ - goto abort; + mutex_unlock(&tconn->cstate_mutex); + spin_lock_irq(&tconn->req_lock); + return SS_CW_FAILED_BY_PEER; } if (val.conn == C_DISCONNECTING) set_bit(DISCONNECT_SENT, &tconn->flags); - wait_event(tconn->ping_wait, (rv = _conn_rq_cond(tconn, mask, val))); + spin_lock_irq(&tconn->req_lock); + + wait_event_lock_irq(tconn->ping_wait, (rv = _conn_rq_cond(tconn, mask, val)), tconn->req_lock,); clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); -abort: mutex_unlock(&tconn->cstate_mutex); - spin_lock_irq(&tconn->req_lock); return rv; } From c02abda2b28734a7e97e15db866507b3cb92b7d0 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 22 Aug 2012 16:15:26 +0200 Subject: [PATCH 567/609] drbd: mutex_unlock "... must no be used in interrupt context" Documentation of mutex_unlock says we must not use it in interrupt context. So do not call it while holding the spin_lock_irq, but give up the spinlock temporarily. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 74 +++++++++++++++++---------------- 1 file changed, 39 insertions(+), 35 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index ce1495187f0..755425a7a99 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1721,38 +1721,6 @@ _conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state return rv; } -static enum drbd_state_rv -conn_cl_wide(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, - enum chg_state_flags f) -{ - enum drbd_state_rv rv; - - spin_unlock_irq(&tconn->req_lock); - mutex_lock(&tconn->cstate_mutex); - - set_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); - if (conn_send_state_req(tconn, mask, val)) { - clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); - /* if (f & CS_VERBOSE) - print_st_err(mdev, os, ns, rv); */ - mutex_unlock(&tconn->cstate_mutex); - spin_lock_irq(&tconn->req_lock); - return SS_CW_FAILED_BY_PEER; - } - - if (val.conn == C_DISCONNECTING) - set_bit(DISCONNECT_SENT, &tconn->flags); - - spin_lock_irq(&tconn->req_lock); - - wait_event_lock_irq(tconn->ping_wait, (rv = _conn_rq_cond(tconn, mask, val)), tconn->req_lock,); - clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); - - mutex_unlock(&tconn->cstate_mutex); - - return rv; -} - enum drbd_state_rv _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, enum chg_state_flags flags) @@ -1761,6 +1729,7 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ struct after_conn_state_chg_work *acscw; enum drbd_conns oc = tconn->cstate; union drbd_state ns_max, ns_min, os; + bool have_mutex = false; rv = is_valid_conn_transition(oc, val.conn); if (rv < SS_SUCCESS) @@ -1772,7 +1741,35 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ if (oc == C_WF_REPORT_PARAMS && val.conn == C_DISCONNECTING && !(flags & (CS_LOCAL_ONLY | CS_HARD))) { - rv = conn_cl_wide(tconn, mask, val, flags); + + /* This will be a cluster-wide state change. + * Need to give up the spinlock, grab the mutex, + * then send the state change request, ... */ + spin_unlock_irq(&tconn->req_lock); + mutex_lock(&tconn->cstate_mutex); + have_mutex = true; + + set_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); + if (conn_send_state_req(tconn, mask, val)) { + /* sending failed. */ + clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); + rv = SS_CW_FAILED_BY_PEER; + /* need to re-aquire the spin lock, though */ + goto abort_unlocked; + } + + if (val.conn == C_DISCONNECTING) + set_bit(DISCONNECT_SENT, &tconn->flags); + + /* ... and re-aquire the spinlock. + * If _conn_rq_cond() returned >= SS_SUCCESS, we must call + * conn_set_state() within the same spinlock. */ + spin_lock_irq(&tconn->req_lock); + wait_event_lock_irq(tconn->ping_wait, + (rv = _conn_rq_cond(tconn, mask, val)), + tconn->req_lock, + ); + clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); if (rv < SS_SUCCESS) goto abort; } @@ -1796,9 +1793,16 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ conn_err(tconn, "Could not kmalloc an acscw\n"); } - return rv; abort: - if (flags & CS_VERBOSE) { + if (have_mutex) { + /* mutex_unlock() "... must not be used in interrupt context.", + * so give up the spinlock, then re-aquire it */ + spin_unlock_irq(&tconn->req_lock); + abort_unlocked: + mutex_unlock(&tconn->cstate_mutex); + spin_lock_irq(&tconn->req_lock); + } + if (rv < SS_SUCCESS && flags & CS_VERBOSE) { conn_err(tconn, "State change failed: %s\n", drbd_set_st_err_str(rv)); conn_err(tconn, " state = { cs:%s }\n", drbd_conn_str(oc)); conn_err(tconn, "wanted = { cs:%s }\n", drbd_conn_str(val.conn)); From bc317a9ecd641b78a4b237cb22b30ecf11443c77 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 22 Aug 2012 11:47:14 +0200 Subject: [PATCH 568/609] drbd: dequeue single work items in wait_for_work() As long as we still use drbd_queue_work_front(), we must only dequeue the single first item during normal operation. The comment in drbd_worker() even says so, but bc8a5a1 drbd: remove struct drbd_tl_epoch objects (barrier works) introduced the batch dequeueing again via list_splice_init() in wait_for_work(). Change back to list_move() of the first item, if any. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_worker.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index c674f17773a..859afdfe5a0 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1792,7 +1792,10 @@ void wait_for_work(struct drbd_tconn *connection, struct list_head *work_list) prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE); spin_lock_irq(&connection->req_lock); spin_lock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */ - list_splice_init(&connection->sender_work.q, work_list); + /* dequeue single item only, + * we still use drbd_queue_work_front() in some places */ + if (!list_empty(&connection->sender_work.q)) + list_move(connection->sender_work.q.next, work_list); spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */ if (!list_empty(work_list) || signal_pending(current)) { spin_unlock_irq(&connection->req_lock); From b792b655cdf79d4d90b4d46fa37e260ba0296850 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 22 Aug 2012 14:59:06 +0200 Subject: [PATCH 569/609] drbd: fix potential list_add corruption If the md_sync_timer triggers a second time, while the work queued during the first time is still pending, this could result in list_add() of an already added item, and corrupt the work item list. This likely only triggered because of the erroneous batch-dequeueing of work items fixed with drbd: dequeue single work items in wait_for_work() Still, skip queueing if md_sync_work is already queued. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index d831e85f151..732053de1db 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3314,7 +3314,9 @@ static void md_sync_timer_fn(unsigned long data) { struct drbd_conf *mdev = (struct drbd_conf *) data; - drbd_queue_work_front(&mdev->tconn->sender_work, &mdev->md_sync_work); + /* must not double-queue! */ + if (list_empty(&mdev->md_sync_work.list)) + drbd_queue_work_front(&mdev->tconn->sender_work, &mdev->md_sync_work); } static int w_md_sync(struct drbd_work *w, int unused) From 88f79ec4ae22a82d7b9a0cd4c9f3ee1def2a382c Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 27 Aug 2012 17:16:21 +0200 Subject: [PATCH 570/609] drbd: Fix IO resuming after connection was established while executing the fence handler Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 37 ++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 755425a7a99..60dde030123 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1204,6 +1204,28 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, } } + if (ns.susp_fen) { + struct drbd_tconn *tconn = mdev->tconn; + + spin_lock_irq(&tconn->req_lock); + if (tconn->susp_fen && conn_lowest_conn(tconn) >= C_CONNECTED) { + /* case2: The connection was established again: */ + struct drbd_conf *odev; + int vnr; + + rcu_read_lock(); + idr_for_each_entry(&tconn->volumes, odev, vnr) + clear_bit(NEW_CUR_UUID, &odev->flags); + rcu_read_unlock(); + _tl_restart(tconn, RESEND); + _conn_request_state(tconn, + (union drbd_state) { { .susp_fen = 1 } }, + (union drbd_state) { { .susp_fen = 0 } }, + CS_VERBOSE); + } + spin_unlock_irq(&tconn->req_lock); + } + /* Became sync source. With protocol >= 96, we still need to send out * the sync uuid now. Need to do that before any drbd_send_state, or * the other side may go "paused sync" before receiving the sync uuids, @@ -1475,7 +1497,6 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) struct drbd_tconn *tconn = w->tconn; enum drbd_conns oc = acscw->oc; union drbd_state ns_max = acscw->ns_max; - union drbd_state ns_min = acscw->ns_min; struct drbd_conf *mdev; int vnr; @@ -1519,20 +1540,6 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) CS_VERBOSE); spin_unlock_irq(&tconn->req_lock); } - /* case2: The connection was established again: */ - if (ns_min.conn >= C_CONNECTED) { - rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) - clear_bit(NEW_CUR_UUID, &mdev->flags); - rcu_read_unlock(); - spin_lock_irq(&tconn->req_lock); - _tl_restart(tconn, RESEND); - _conn_request_state(tconn, - (union drbd_state) { { .susp_fen = 1 } }, - (union drbd_state) { { .susp_fen = 0 } }, - CS_VERBOSE); - spin_unlock_irq(&tconn->req_lock); - } } kref_put(&tconn->kref, &conn_destroy); return 0; From 892fdd1aee35803bfa41ccf4df850ddb70f0811c Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 27 Aug 2012 17:20:12 +0200 Subject: [PATCH 571/609] drbd: Improve readability of IO resuming after freeze due to no data access The previous way of doing the state change was also okay since the state change on the susp flag gets propagated from the mdev to the tconn. Fortunately all this goes away in drbd-9.0 Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 60dde030123..f3e6090d462 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1187,21 +1187,25 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, state change. This function might sleep */ if (ns.susp_nod) { + struct drbd_tconn *tconn = mdev->tconn; enum drbd_req_event what = NOTHING; - if (os.conn < C_CONNECTED && conn_lowest_conn(mdev->tconn) >= C_CONNECTED) + spin_lock_irq(&tconn->req_lock); + if (os.conn < C_CONNECTED && conn_lowest_conn(tconn) >= C_CONNECTED) what = RESEND; if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) && - conn_lowest_disk(mdev->tconn) > D_NEGOTIATING) + conn_lowest_disk(tconn) > D_NEGOTIATING) what = RESTART_FROZEN_DISK_IO; - if (what != NOTHING) { - spin_lock_irq(&mdev->tconn->req_lock); - _tl_restart(mdev->tconn, what); - _drbd_set_state(_NS(mdev, susp_nod, 0), CS_VERBOSE, NULL); - spin_unlock_irq(&mdev->tconn->req_lock); + if (tconn->susp_nod && what != NOTHING) { + _tl_restart(tconn, what); + _conn_request_state(tconn, + (union drbd_state) { { .susp_nod = 1 } }, + (union drbd_state) { { .susp_nod = 0 } }, + CS_VERBOSE); } + spin_unlock_irq(&tconn->req_lock); } if (ns.susp_fen) { From 07fc96197aec46f7ad4f08a4b2a1ea426112e24d Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 28 Aug 2012 11:07:56 +0200 Subject: [PATCH 572/609] drbd: Do not check aspects that are not subject to change in _conn_requests_state() When _conn_requests_state() is used to change other parts of the state than the connection, do not check for a valid connection transition. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index f3e6090d462..9ae40c96c1b 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1742,9 +1742,11 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ union drbd_state ns_max, ns_min, os; bool have_mutex = false; - rv = is_valid_conn_transition(oc, val.conn); - if (rv < SS_SUCCESS) - goto abort; + if (mask.conn) { + rv = is_valid_conn_transition(oc, val.conn); + if (rv < SS_SUCCESS) + goto abort; + } rv = conn_is_valid_transition(tconn, mask, val, flags); if (rv < SS_SUCCESS) From 797020117761eee21ef284cea90c51c690fca169 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 28 Aug 2012 11:33:35 +0200 Subject: [PATCH 573/609] drbd: Fix the way the STATE_SENT bit is cleared With merging the commit 'drbd: Delay/reject other state changes while establishing a connection' the condition check for clearing the flag was wrong. Move the bit clearing to the __drbd_set_state() function in order to have it already cleared for the other parts of the function. I.e. clearing the susp_fen in the after_state_ch() function. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 9ae40c96c1b..a16278cde3d 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -186,6 +186,24 @@ enum drbd_conns conn_lowest_conn(struct drbd_tconn *tconn) return conn; } +static bool no_peer_wf_report_params(struct drbd_tconn *tconn) +{ + struct drbd_conf *mdev; + int vnr; + bool rv = true; + + rcu_read_lock(); + idr_for_each_entry(&tconn->volumes, mdev, vnr) + if (mdev->state.conn == C_WF_REPORT_PARAMS) { + rv = false; + break; + } + rcu_read_unlock(); + + return rv; +} + + /** * cl_wide_st_chg() - true if the state change is a cluster wide one * @mdev: DRBD device. @@ -971,6 +989,11 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) drbd_print_uuids(mdev, "attached to UUIDs"); + /* Wake up role changes, that were delayed because of connection establishing */ + if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS && + no_peer_wf_report_params(mdev->tconn)) + clear_bit(STATE_SENT, &mdev->tconn->flags); + wake_up(&mdev->misc_wait); wake_up(&mdev->state_wait); wake_up(&mdev->tconn->ping_wait); @@ -1457,12 +1480,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, && verify_can_do_stop_sector(mdev)) drbd_send_state(mdev, ns); - /* Wake up role changes, that were delayed because of connection establishing */ - if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) { - if (test_and_clear_bit(STATE_SENT, &mdev->tconn->flags)) - wake_up(&mdev->state_wait); - } - /* This triggers bitmap writeout of potentially still unwritten pages * if the resync finished cleanly, or aborted because of peer disk * failure, or because of connection loss. From 4ae98b4db3d0e9336d0b771636b7d053a1460805 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 28 Aug 2012 11:46:22 +0200 Subject: [PATCH 574/609] drbd: Imporve the error reporting of failed conn state changes Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index a16278cde3d..c3ec578918f 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1834,8 +1834,8 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ } if (rv < SS_SUCCESS && flags & CS_VERBOSE) { conn_err(tconn, "State change failed: %s\n", drbd_set_st_err_str(rv)); - conn_err(tconn, " state = { cs:%s }\n", drbd_conn_str(oc)); - conn_err(tconn, "wanted = { cs:%s }\n", drbd_conn_str(val.conn)); + conn_err(tconn, " mask = 0x%x val = 0x%x\n", mask.i, val.i); + conn_err(tconn, " old_conn:%s wanted_conn:%s\n", drbd_conn_str(oc), drbd_conn_str(val.conn)); } return rv; } From d76440181d0e05826f228189b74b4dbf64b68981 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 28 Aug 2012 14:39:44 +0200 Subject: [PATCH 575/609] drbd: Fix postponed requests * Postponed requests should not set or clear out-of-sync marks * When a request gets postponed we need to drop its reference mdev->local_cnt (put_ldev()). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 5ddb01edd93..57cbef2ecee 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -123,11 +123,13 @@ void drbd_req_destroy(struct kref *kref) * (local only or remote failed). * Other places where we set out-of-sync: * READ with local io-error */ - if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK)) - drbd_set_out_of_sync(mdev, req->i.sector, req->i.size); + if (!(s & RQ_POSTPONED)) { + if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK)) + drbd_set_out_of_sync(mdev, req->i.sector, req->i.size); - if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS)) - drbd_set_in_sync(mdev, req->i.sector, req->i.size); + if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS)) + drbd_set_in_sync(mdev, req->i.sector, req->i.size); + } /* one might be tempted to move the drbd_al_complete_io * to the local io completion callback drbd_request_endio. @@ -1046,6 +1048,7 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long if (req->private_bio) { bio_put(req->private_bio); req->private_bio = NULL; + put_ldev(mdev); } goto out; } From 19fffd7b0303e8843aa2decfd43fa57c9d511409 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 28 Aug 2012 16:48:03 +0200 Subject: [PATCH 576/609] drbd: Call drbd_md_sync() explicitly after a state change on the connection Without this, the meta-data gets updates after 5 seconds by the md_sync_timer. Better to do it immeditaly after a state change. If the asender detects a network failure, it may take a bit until the worker processes the according after-conn-state-change work item. The worker might be blocked in sending something, i.e. it takes until it gets into its timeout. That is 6 seconds by default which is longer than the 5 seconds of the md_sync_timer. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_main.c | 16 ++++++++++++++++ drivers/block/drbd/drbd_nl.c | 16 ---------------- drivers/block/drbd/drbd_receiver.c | 1 + drivers/block/drbd/drbd_state.c | 3 +++ 5 files changed, 21 insertions(+), 16 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 1c1576b942b..f223f01b4e1 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1116,6 +1116,7 @@ extern void drbd_free_bc(struct drbd_backing_dev *ldev); extern void drbd_mdev_cleanup(struct drbd_conf *mdev); void drbd_print_uuids(struct drbd_conf *mdev, const char *text); +extern void conn_md_sync(struct drbd_tconn *tconn); extern void drbd_md_sync(struct drbd_conf *mdev); extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev); extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 732053de1db..5e5a6abb281 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2820,6 +2820,22 @@ void drbd_free_sock(struct drbd_tconn *tconn) /* meta data management */ +void conn_md_sync(struct drbd_tconn *tconn) +{ + struct drbd_conf *mdev; + int vnr; + + rcu_read_lock(); + idr_for_each_entry(&tconn->volumes, mdev, vnr) { + kref_get(&mdev->kref); + rcu_read_unlock(); + drbd_md_sync(mdev); + kref_put(&mdev->kref, &drbd_minor_destroy); + rcu_read_lock(); + } + rcu_read_unlock(); +} + struct meta_data_on_disk { u64 la_size; /* last agreed size. */ u64 uuid[UI_SIZE]; /* UUIDs. */ diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 363034a77e8..476491ffdab 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -363,22 +363,6 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) return ret; } -static void conn_md_sync(struct drbd_tconn *tconn) -{ - struct drbd_conf *mdev; - int vnr; - - rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) { - kref_get(&mdev->kref); - rcu_read_unlock(); - drbd_md_sync(mdev); - kref_put(&mdev->kref, &drbd_minor_destroy); - rcu_read_lock(); - } - rcu_read_unlock(); -} - int conn_khelper(struct drbd_tconn *tconn, char *cmd) { char *envp[] = { "HOME=/", diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index d5afa0a81fd..fed34a7bef4 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -5390,6 +5390,7 @@ int drbd_asender(struct drbd_thread *thi) if (0) { reconnect: conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD); + conn_md_sync(tconn); } if (0) { disconnect: diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index c3ec578918f..84512ec1917 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1563,6 +1563,9 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) } } kref_put(&tconn->kref, &conn_destroy); + + conn_md_sync(tconn); + return 0; } From 76590cd1fc338fd1c50f7121636db421deb8b881 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 29 Aug 2012 15:23:14 +0200 Subject: [PATCH 577/609] drbd: Fix postponed requests A postponed request might has RQ_IN_ACT_LOG already set, but is POSTPONED before it gets something in the RQ_LOCAL_MASK set. Up to now this caused a left-over active extent. Fix that by only testing for the RQ_IN_ACT_LOG bit in drbd_req_destroy() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 57cbef2ecee..d1d17fcd923 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -141,10 +141,9 @@ void drbd_req_destroy(struct kref *kref) * but after the extent has been dropped from the al, * we would forget to resync the corresponding extent. */ - if (s & RQ_LOCAL_MASK) { + if (s & RQ_IN_ACT_LOG) { if (get_ldev_if_state(mdev, D_FAILED)) { - if (s & RQ_IN_ACT_LOG) - drbd_al_complete_io(mdev, &req->i); + drbd_al_complete_io(mdev, &req->i); put_ldev(mdev); } else if (__ratelimit(&drbd_ratelimit_state)) { dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu, %u), " From 70f17b6bd18dfe33f40db7573baa663b866be6ba Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 3 Sep 2012 14:08:35 +0200 Subject: [PATCH 578/609] drbd: differentiate early and later "postponing" of requests We use the RQ_POSTPONED flag to mark a request for several reasons. It may be a conflicting request in a dual-primary setup, where conflict detection and resolution on the peer decided that this request needs to be re-submitted, it needs to re-enter drbd_make_request() to fix the data divergence caused by these conflicting, partially overlapping, quasi-simultaneous requests. In this case we need to mark the corresponding area as out-of-sync, before we call drbd_al_complete_io(). We also use the RQ_POSTPONED flag to just "push back" a request, before even processing it, if IO is suspended for some reason. In this case, as this request was neither submitted nor sent yet, we must not touch the bitmap. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index d1d17fcd923..e307890e6af 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -123,7 +123,14 @@ void drbd_req_destroy(struct kref *kref) * (local only or remote failed). * Other places where we set out-of-sync: * READ with local io-error */ - if (!(s & RQ_POSTPONED)) { + + /* There is a special case: + * we may notice late that IO was suspended, + * and postpone, or schedule for retry, a write, + * before it even was submitted or sent. + * In that case we do not want to touch the bitmap at all. + */ + if ((s & (RQ_POSTPONED|RQ_LOCAL_MASK|RQ_NET_MASK)) != RQ_POSTPONED) { if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK)) drbd_set_out_of_sync(mdev, req->i.sector, req->i.size); From 1393b59f8c46001c8dbd47078881483cf97813c3 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 3 Sep 2012 14:04:23 +0200 Subject: [PATCH 579/609] drbd: Remove duplicate code Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index fed34a7bef4..813759f1b6a 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -490,22 +490,9 @@ static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flag static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size) { - mm_segment_t oldfs; - struct kvec iov = { - .iov_base = buf, - .iov_len = size, - }; - struct msghdr msg = { - .msg_iovlen = 1, - .msg_iov = (struct iovec *)&iov, - .msg_flags = MSG_WAITALL | MSG_NOSIGNAL - }; int rv; - oldfs = get_fs(); - set_fs(KERNEL_DS); - rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags); - set_fs(oldfs); + rv = drbd_recv_short(tconn->data.socket, buf, size, 0); if (rv < 0) { if (rv == -ECONNRESET) From a3025a273700fc51dd561c7b2941f3c9db9be90a Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 3 Sep 2012 15:39:01 +0200 Subject: [PATCH 580/609] drbd: Fix comparison of is_valid_transition()'s return code is_valid_transition() might return SS_NOTHING_TO_DO. The condition function _req_st_cond() returned SS_NOTHING_TO_DO, which caused the wait_event to abort too early. Therefore drbd_req_state() did not consume the next CL_ST_CHG_SUCCESS or SS_CW_FAILED_BY_PEER causing serve disruption of the state machine logic... Detaching from a single volue was one way to trigger this bug. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 84512ec1917..69ef35266ba 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -277,16 +277,16 @@ _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, os = drbd_read_state(mdev); ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL); rv = is_valid_transition(os, ns); - if (rv == SS_SUCCESS) + if (rv >= SS_SUCCESS) rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ if (!cl_wide_st_chg(mdev, os, ns)) rv = SS_CW_NO_NEED; if (rv == SS_UNKNOWN_ERROR) { rv = is_valid_state(mdev, ns); - if (rv == SS_SUCCESS) { + if (rv >= SS_SUCCESS) { rv = is_valid_soft_transition(os, ns, mdev->tconn); - if (rv == SS_SUCCESS) + if (rv >= SS_SUCCESS) rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ } } From 1b6dd252e6c631322372c018ed546a108d9869d3 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 4 Sep 2012 15:16:20 +0200 Subject: [PATCH 581/609] drbd: panic on delayed completion of aborted requests "aborting" requests, or force-detaching the disk, is intended for completely blocked/hung local backing devices which do no longer complete requests at all, not even do error completions. In this situation, usually a hard-reset and failover is the only way out. By "aborting", basically faking a local error-completion, we allow for a more graceful swichover by cleanly migrating services. Still the affected node has to be rebooted "soon". By completing these requests, we allow the upper layers to re-use the associated data pages. If later the local backing device "recovers", and now DMAs some data from disk into the original request pages, in the best case it will just put random data into unused pages; but typically it will corrupt meanwhile completely unrelated data, causing all sorts of damage. Which means delayed successful completion, especially for READ requests, is a reason to panic(). We assume that a delayed *error* completion is OK, though we still will complain noisily about it. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_worker.c | 37 ++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 859afdfe5a0..eeda8b8e9d8 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -221,6 +221,43 @@ void drbd_request_endio(struct bio *bio, int error) error = -EIO; } + + /* If this request was aborted locally before, + * but now was completed "successfully", + * chances are that this caused arbitrary data corruption. + * + * "aborting" requests, or force-detaching the disk, is intended for + * completely blocked/hung local backing devices which do no longer + * complete requests at all, not even do error completions. In this + * situation, usually a hard-reset and failover is the only way out. + * + * By "aborting", basically faking a local error-completion, + * we allow for a more graceful swichover by cleanly migrating services. + * Still the affected node has to be rebooted "soon". + * + * By completing these requests, we allow the upper layers to re-use + * the associated data pages. + * + * If later the local backing device "recovers", and now DMAs some data + * from disk into the original request pages, in the best case it will + * just put random data into unused pages; but typically it will corrupt + * meanwhile completely unrelated data, causing all sorts of damage. + * + * Which means delayed successful completion, + * especially for READ requests, + * is a reason to panic(). + * + * We assume that a delayed *error* completion is OK, + * though we still will complain noisily about it. + */ + if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) { + if (__ratelimit(&drbd_ratelimit_state)) + dev_emerg(DEV, "delayed completion of aborted local request; disk-timeout may be too aggressive\n"); + + if (!error) + panic("possible random memory corruption caused by delayed completion of aborted local request\n"); + } + /* to avoid recursion in __req_mod */ if (unlikely(error)) { what = (bio_data_dir(bio) == WRITE) From a506c13a4d1ec5e1f2f9bc0123dacb5d123004d3 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 26 Sep 2012 14:16:30 +0200 Subject: [PATCH 582/609] drbd: use list_move_tail instead of list_del/list_add_tail Using list_move_tail() instead of list_del() + list_add_tail(). spatch with a semantic match is used to found this problem. (http://coccinelle.lip6.fr/) Signed-off-by: Wei Yongjun Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_worker.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index eeda8b8e9d8..377f27bbba1 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -134,8 +134,7 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel spin_lock_irqsave(&mdev->tconn->req_lock, flags); mdev->writ_cnt += peer_req->i.size >> 9; - list_del(&peer_req->w.list); /* has been on active_ee or sync_ee */ - list_add_tail(&peer_req->w.list, &mdev->done_ee); + list_move_tail(&peer_req->w.list, &mdev->done_ee); /* * Do not remove from the write_requests tree here: we did not send the From bc891c9ae3fb2848922e0f0da22fd7de0d58dc1b Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 26 Sep 2012 14:18:51 +0200 Subject: [PATCH 583/609] drbd: fix potential deadlock during bitmap (re-)allocation The former comment arguing that GFP_KERNEL was good enough was wrong: it did not take resize into account at all, and assumed the only path leading here was the normal attach on a still secondary device, so no deadlock would be possible. Both resize on a Primary, or attach on a diskless Primary, could potentially deadlock. drbd_bm_resize() is called while IO to the respective device is suspended, so we must use GFP_NOIO to avoid potential deadlock. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 4a076b2553e..e502535d2c4 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -388,14 +388,16 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) return old_pages; /* Trying kmalloc first, falling back to vmalloc. - * GFP_KERNEL is ok, as this is done when a lower level disk is - * "attached" to the drbd. Context is receiver thread or drbdsetup / - * netlink process. As we have no disk yet, we are not in the IO path, - * not even the IO path of the peer. */ + * GFP_NOIO, as this is called while drbd IO is "suspended", + * and during resize or attach on diskless Primary, + * we must not block on IO to ourselves. + * Context is receiver thread or dmsetup. */ bytes = sizeof(struct page *)*want; - new_pages = kmalloc(bytes, GFP_KERNEL); + new_pages = kmalloc(bytes, GFP_NOIO); if (!new_pages) { - new_pages = vmalloc(bytes); + new_pages = __vmalloc(bytes, + GFP_NOIO | __GFP_HIGHMEM, + PAGE_KERNEL); if (!new_pages) return NULL; vmalloced = 1; @@ -406,7 +408,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) for (i = 0; i < have; i++) new_pages[i] = old_pages[i]; for (; i < want; i++) { - page = alloc_page(GFP_HIGHUSER); + page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); if (!page) { bm_free_pages(new_pages + have, i - have); bm_vk_free(new_pages, vmalloced); From 8747d30af97232f9ff4cde78b8d259cc715a9b7a Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 26 Sep 2012 14:22:40 +0200 Subject: [PATCH 584/609] drbd: a few more GFP_KERNEL -> GFP_NOIO This has not yet been observed, but conceivably, when using GFP_KERNEL allocations from drbd_md_sync(), drbd_flush_after_epoch() or receive_SyncParam(), we could trigger additional IO to our own device, or an other device in a criss-cross setup, and end up in a local deadlock, or potentially a distributed deadlock in a criss-cross setup involving the peer blocked in a similar way waiting for us to make progress. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_worker.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index f223f01b4e1..4aadd081817 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -2294,7 +2294,7 @@ static inline void drbd_md_flush(struct drbd_conf *mdev) if (test_bit(MD_NO_FUA, &mdev->flags)) return; - r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL); + r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_NOIO, NULL); if (r) { set_bit(MD_NO_FUA, &mdev->flags); dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 377f27bbba1..d348260301f 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -473,7 +473,7 @@ struct fifo_buffer *fifo_alloc(int fifo_size) { struct fifo_buffer *fb; - fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_KERNEL); + fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO); if (!fb) return NULL; From e34b677d09ce375a87acd0360537cbed33881b0c Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 27 Sep 2012 15:07:11 +0200 Subject: [PATCH 585/609] drbd: wait for meta data IO completion even with failed disk, unless force-detached The intention of force-detach is to be able to deal with a completely unresponsive lower level IO stack, which does not even deliver error completions anymore, but no completion at all. In all other cases, we must still wait for the meta data IO completion. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 17 +++++++---------- drivers/block/drbd/drbd_bitmap.c | 8 ++++---- drivers/block/drbd/drbd_int.h | 4 ++-- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index e81085795ec..bc6284ef21d 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -123,13 +123,7 @@ void drbd_md_put_buffer(struct drbd_conf *mdev) wake_up(&mdev->misc_wait); } -static bool md_io_allowed(struct drbd_conf *mdev) -{ - enum drbd_disk_state ds = mdev->state.disk; - return ds >= D_NEGOTIATING || ds == D_ATTACHING; -} - -void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, +void wait_until_done_or_force_detached(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, unsigned int *done) { long dt; @@ -141,9 +135,12 @@ void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing if (dt == 0) dt = MAX_SCHEDULE_TIMEOUT; - dt = wait_event_timeout(mdev->misc_wait, *done || !md_io_allowed(mdev), dt); - if (dt == 0) + dt = wait_event_timeout(mdev->misc_wait, + *done || test_bit(FORCE_DETACH, &mdev->flags), dt); + if (dt == 0) { dev_err(DEV, "meta-data IO operation timed out\n"); + drbd_chk_io_error(mdev, 1, DRBD_FORCE_DETACH); + } } static int _drbd_md_sync_page_io(struct drbd_conf *mdev, @@ -183,7 +180,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, bio_endio(bio, -EIO); else submit_bio(rw, bio); - wait_until_done_or_disk_failure(mdev, bdev, &mdev->md_io.done); + wait_until_done_or_force_detached(mdev, bdev, &mdev->md_io.done); if (bio_flagged(bio, BIO_UPTODATE)) err = mdev->md_io.error; diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index e502535d2c4..e30ff720894 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1128,7 +1128,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w * "in_flight reached zero, all done" event. */ if (!atomic_dec_and_test(&ctx->in_flight)) - wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); + wait_until_done_or_force_detached(mdev, mdev->ldev, &ctx->done); else kref_put(&ctx->kref, &bm_aio_ctx_destroy); @@ -1145,7 +1145,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w } if (atomic_read(&ctx->in_flight)) - err = -EIO; /* Disk failed during IO... */ + err = -EIO; /* Disk timeout/force-detach during IO... */ now = jiffies; if (rw == WRITE) { @@ -1273,11 +1273,11 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc } bm_page_io_async(ctx, idx, WRITE_SYNC); - wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); + wait_until_done_or_force_detached(mdev, mdev->ldev, &ctx->done); if (ctx->error) drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); - /* that should force detach, so the in memory bitmap will be + /* that causes us to detach, so the in memory bitmap will be * gone in a moment as well. */ mdev->bm_writ_cnt++; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4aadd081817..eeab868f056 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1452,8 +1452,8 @@ extern void drbd_md_put_buffer(struct drbd_conf *mdev); extern int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, sector_t sector, int rw); extern void drbd_ov_out_of_sync_found(struct drbd_conf *, sector_t, int); -extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, - unsigned int *done); +extern void wait_until_done_or_force_detached(struct drbd_conf *mdev, + struct drbd_backing_dev *bdev, unsigned int *done); extern void drbd_rs_controller_reset(struct drbd_conf *mdev); static inline void ov_out_of_sync_print(struct drbd_conf *mdev) From edc9f5eb7afa3d832f540fcfe10e3e1087e6f527 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 27 Sep 2012 15:18:21 +0200 Subject: [PATCH 586/609] drbd: always write bitmap on detach If we detach due to local read-error (which sets a bit in the bitmap), stay Primary, and then re-attach (which re-reads the bitmap from disk), we potentially lost the "out-of-sync" (or, "bad block") information in the bitmap. Always (try to) write out the changed bitmap pages before going diskless. That way, we don't lose the bit for the bad block, the next resync will fetch it from the peer, and rewrite it locally, which may result in block reallocation in some lower layer (or the hardware), and thereby "heal" the bad blocks. If the bitmap writeout errors out as well, we will (again: try to) mark the "we need a full sync" bit in our super block, if it was a READ error; writes are covered by the activity log already. If that superblock does not make it to disk either, we are sorry. Maybe we just lost an entire disk or controller (or iSCSI connection), and there actually are no bad blocks at all, so we don't need to re-fetch from the peer, there is no "auto-healing" necessary. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 39 +++++++++++++++++++++++++++----- drivers/block/drbd/drbd_main.c | 20 ++++++++++++++++ drivers/block/drbd/drbd_nl.c | 2 ++ drivers/block/drbd/drbd_req.c | 9 +++++--- drivers/block/drbd/drbd_worker.c | 4 ++-- 5 files changed, 63 insertions(+), 11 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index eeab868f056..32a9ab67a5f 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -682,7 +682,8 @@ enum { once no more io in flight, start bitmap io */ BITMAP_IO_QUEUED, /* Started bitmap IO */ GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */ - WAS_IO_ERROR, /* Local disk failed returned IO error */ + WAS_IO_ERROR, /* Local disk failed, returned IO error */ + WAS_READ_ERROR, /* Local disk READ failed (set additionally to the above) */ FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */ RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ RESIZE_PENDING, /* Size change detected locally, waiting for the response from @@ -1142,6 +1143,9 @@ extern void drbd_queue_bitmap_io(struct drbd_conf *mdev, extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why, enum bm_flag flags); +extern int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, + int (*io_fn)(struct drbd_conf *), + char *why, enum bm_flag flags); extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); extern void drbd_go_diskless(struct drbd_conf *mdev); @@ -1661,14 +1665,15 @@ static inline union drbd_state drbd_read_state(struct drbd_conf *mdev) } enum drbd_force_detach_flags { - DRBD_IO_ERROR, + DRBD_READ_ERROR, + DRBD_WRITE_ERROR, DRBD_META_IO_ERROR, DRBD_FORCE_DETACH, }; #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, - enum drbd_force_detach_flags forcedetach, + enum drbd_force_detach_flags df, const char *where) { enum drbd_io_error_p ep; @@ -1678,18 +1683,40 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, rcu_read_unlock(); switch (ep) { case EP_PASS_ON: /* FIXME would this be better named "Ignore"? */ - if (forcedetach == DRBD_IO_ERROR) { + if (df == DRBD_READ_ERROR || df == DRBD_WRITE_ERROR) { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Local IO failed in %s.\n", where); if (mdev->state.disk > D_INCONSISTENT) _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL); break; } - /* NOTE fall through to detach case if forcedetach set */ + /* NOTE fall through for DRBD_META_IO_ERROR or DRBD_FORCE_DETACH */ case EP_DETACH: case EP_CALL_HELPER: + /* Remember whether we saw a READ or WRITE error. + * + * Recovery of the affected area for WRITE failure is covered + * by the activity log. + * READ errors may fall outside that area though. Certain READ + * errors can be "healed" by writing good data to the affected + * blocks, which triggers block re-allocation in lower layers. + * + * If we can not write the bitmap after a READ error, + * we may need to trigger a full sync (see w_go_diskless()). + * + * Force-detach is not really an IO error, but rather a + * desperate measure to try to deal with a completely + * unresponsive lower level IO stack. + * Still it should be treated as a WRITE error. + * + * Meta IO error is always WRITE error: + * we read meta data only once during attach, + * which will fail in case of errors. + */ set_bit(WAS_IO_ERROR, &mdev->flags); - if (forcedetach == DRBD_FORCE_DETACH) + if (df == DRBD_READ_ERROR) + set_bit(WAS_READ_ERROR, &mdev->flags); + if (df == DRBD_FORCE_DETACH) set_bit(FORCE_DETACH, &mdev->flags); if (mdev->state.disk > D_FAILED) { _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 5e5a6abb281..0f73e157dee 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3226,6 +3226,26 @@ static int w_go_diskless(struct drbd_work *w, int unused) * inc/dec it frequently. Once we are D_DISKLESS, no one will touch * the protected members anymore, though, so once put_ldev reaches zero * again, it will be safe to free them. */ + + /* Try to write changed bitmap pages, read errors may have just + * set some bits outside the area covered by the activity log. + * + * If we have an IO error during the bitmap writeout, + * we will want a full sync next time, just in case. + * (Do we want a specific meta data flag for this?) + * + * If that does not make it to stable storage either, + * we cannot do anything about that anymore. */ + if (mdev->bitmap) { + if (drbd_bitmap_io_from_worker(mdev, drbd_bm_write, + "detach", BM_LOCKED_MASK)) { + if (test_bit(WAS_READ_ERROR, &mdev->flags)) { + drbd_md_set_flag(mdev, MDF_FULL_SYNC); + drbd_md_sync(mdev); + } + } + } + drbd_force_state(mdev, NS(disk, D_DISKLESS)); return 0; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 476491ffdab..52258867222 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1294,6 +1294,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) /* make sure there is no leftover from previous force-detach attempts */ clear_bit(FORCE_DETACH, &mdev->flags); + clear_bit(WAS_IO_ERROR, &mdev->flags); + clear_bit(WAS_READ_ERROR, &mdev->flags); /* and no leftover from previously aborted resync or verify, either */ mdev->rs_total = 0; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index e307890e6af..97a9e69dd23 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -492,11 +492,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, mod_rq_state(req, m, 0, RQ_LOCAL_ABORTED); break; + case WRITE_COMPLETED_WITH_ERROR: + __drbd_chk_io_error(mdev, DRBD_WRITE_ERROR); + mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED); + break; + case READ_COMPLETED_WITH_ERROR: drbd_set_out_of_sync(mdev, req->i.sector, req->i.size); - /* fall through. */ - case WRITE_COMPLETED_WITH_ERROR: - __drbd_chk_io_error(mdev, DRBD_IO_ERROR); + __drbd_chk_io_error(mdev, DRBD_READ_ERROR); /* fall through. */ case READ_AHEAD_COMPLETED_WITH_ERROR: /* it is legal to fail READA, no __drbd_chk_io_error in that case. */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index d348260301f..64a7305c678 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -106,7 +106,7 @@ void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(lo if (list_empty(&mdev->read_ee)) wake_up(&mdev->ee_wait); if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) - __drbd_chk_io_error(mdev, DRBD_IO_ERROR); + __drbd_chk_io_error(mdev, DRBD_READ_ERROR); spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); drbd_queue_work(&mdev->tconn->sender_work, &peer_req->w); @@ -147,7 +147,7 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee); if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) - __drbd_chk_io_error(mdev, DRBD_IO_ERROR); + __drbd_chk_io_error(mdev, DRBD_WRITE_ERROR); spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); if (block_id == ID_SYNCER) From 42839f65361baa0fa62494c32f1dae570e9dce19 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 27 Sep 2012 15:19:38 +0200 Subject: [PATCH 587/609] drbd: log request sector offset and size for IO errors Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 97a9e69dd23..b905a0453bf 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -425,6 +425,20 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, kref_sub(&req->kref, k_put, drbd_req_destroy); } +static void drbd_report_io_error(struct drbd_conf *mdev, struct drbd_request *req) +{ + char b[BDEVNAME_SIZE]; + + if (!__ratelimit(&drbd_ratelimit_state)) + return; + + dev_warn(DEV, "local %s IO error sector %llu+%u on %s\n", + (req->rq_state & RQ_WRITE) ? "WRITE" : "READ", + (unsigned long long)req->i.sector, + req->i.size >> 9, + bdevname(mdev->ldev->backing_bdev, b)); +} + /* obviously this could be coded as many single functions * instead of one huge switch, * or by putting the code directly in the respective locations @@ -493,12 +507,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case WRITE_COMPLETED_WITH_ERROR: + drbd_report_io_error(mdev, req); __drbd_chk_io_error(mdev, DRBD_WRITE_ERROR); mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED); break; case READ_COMPLETED_WITH_ERROR: drbd_set_out_of_sync(mdev, req->i.sector, req->i.size); + drbd_report_io_error(mdev, req); __drbd_chk_io_error(mdev, DRBD_READ_ERROR); /* fall through. */ case READ_AHEAD_COMPLETED_WITH_ERROR: @@ -1108,7 +1124,8 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long } else if (no_remote) { nodata: if (__ratelimit(&drbd_ratelimit_state)) - dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); + dev_err(DEV, "IO ERROR: neither local nor remote data, sector %llu+%u\n", + (unsigned long long)req->i.sector, req->i.size >> 9); /* A write may have been queued for send_oos, however. * So we can not simply free it, we must go through drbd_req_put_completion_ref() */ } From 4035e4c2ebeb6ac1acbf4e5dd8a759fcf93ff8da Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 1 Oct 2012 18:04:12 +0200 Subject: [PATCH 588/609] drbd: Fix clearing of MDF_AL_DISABLED Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 52258867222..298dd3e35e0 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1226,7 +1226,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) mutex_unlock(&mdev->tconn->conf_update); if (new_disk_conf->al_updates) - mdev->ldev->md.flags &= MDF_AL_DISABLED; + mdev->ldev->md.flags &= ~MDF_AL_DISABLED; else mdev->ldev->md.flags |= MDF_AL_DISABLED; @@ -1615,7 +1615,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) flags. */ if (rcu_dereference(mdev->ldev->disk_conf)->al_updates) - mdev->ldev->md.flags &= MDF_AL_DISABLED; + mdev->ldev->md.flags &= ~MDF_AL_DISABLED; else mdev->ldev->md.flags |= MDF_AL_DISABLED; From fd0017c1243b2b9058525672b9ea13272fb51b30 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 19 Oct 2012 14:19:23 +0200 Subject: [PATCH 589/609] drbd: fix regression: potential NULL pointer dereference recent commit drbd: always write bitmap on detach introduced a bitmap writeout during detach, which obviously needs some meta data device to write to. Unfortunately, that same error path may be taken if we fail to attach, e.g. due to UUID mismatch, after we changed state to D_ATTACHING, but before the lower level device pointer is even assigned. We need to test for presence of mdev->ldev. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 5 +++++ drivers/block/drbd/drbd_main.c | 8 ++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 32a9ab67a5f..057ffed6eb7 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -2318,6 +2318,11 @@ static inline void drbd_md_flush(struct drbd_conf *mdev) { int r; + if (mdev->ldev == NULL) { + dev_warn(DEV, "mdev->ldev == NULL in drbd_md_flush\n"); + return; + } + if (test_bit(MD_NO_FUA, &mdev->flags)) return; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 0f73e157dee..be4f5827712 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3235,8 +3235,12 @@ static int w_go_diskless(struct drbd_work *w, int unused) * (Do we want a specific meta data flag for this?) * * If that does not make it to stable storage either, - * we cannot do anything about that anymore. */ - if (mdev->bitmap) { + * we cannot do anything about that anymore. + * + * We still need to check if both bitmap and ldev are present, we may + * end up here after a failed attach, before ldev was even assigned. + */ + if (mdev->bitmap && mdev->ldev) { if (drbd_bitmap_io_from_worker(mdev, drbd_bm_write, "detach", BM_LOCKED_MASK)) { if (test_bit(WAS_READ_ERROR, &mdev->flags)) { From 518a4d53b2985451a08cb4e5b79deacfe151a38a Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 19 Oct 2012 14:21:22 +0200 Subject: [PATCH 590/609] drbd: don't try to clear bits once the disk has failed If the disk has failed already, there is no point trying to change the bitmap. drbd_set_out_of_sync() already had this safeguard, time to add it to drbd_set_in_sync() as well. This also prevents some warning messages, like FIXME asender in bm_change_bits_to, bitmap locked for 'detach' by worker if our disk fails during resync, while there are some resync acks queued up. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index bc6284ef21d..92510f8ad01 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -695,11 +695,15 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, (unsigned long long)sector, size); return; } + + if (!get_ldev(mdev)) + return; /* no disk, no metadata, no bitmap to clear bits in */ + nr_sectors = drbd_get_capacity(mdev->this_bdev); esector = sector + (size >> 9) - 1; if (!expect(sector < nr_sectors)) - return; + goto out; if (!expect(esector < nr_sectors)) esector = nr_sectors - 1; @@ -709,7 +713,7 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, * round up start sector, round down end sector. we make sure we only * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */ if (unlikely(esector < BM_SECT_PER_BIT-1)) - return; + goto out; if (unlikely(esector == (nr_sectors-1))) ebnr = lbnr; else @@ -717,14 +721,14 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); if (sbnr > ebnr) - return; + goto out; /* * ok, (capacity & 7) != 0 sometimes, but who cares... * we count rs_{total,left} in bits, not sectors. */ count = drbd_bm_clear_bits(mdev, sbnr, ebnr); - if (count && get_ldev(mdev)) { + if (count) { drbd_advance_rs_marks(mdev, drbd_bm_total_weight(mdev)); spin_lock_irqsave(&mdev->al_lock, flags); drbd_try_clear_on_disk_bm(mdev, sector, count, true); @@ -733,8 +737,9 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, /* just wake_up unconditional now, various lc_chaged(), * lc_put() in drbd_try_clear_on_disk_bm(). */ wake_up = 1; - put_ldev(mdev); } +out: + put_ldev(mdev); if (wake_up) wake_up(&mdev->al_wait); } From 328e0f125bf41f4f33f684db22015f92cb44fe56 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 19 Oct 2012 14:37:47 +0200 Subject: [PATCH 591/609] drbd: Broadcast sync progress no more often than once per second Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_nl.c | 6 ++++++ drivers/block/drbd/drbd_worker.c | 4 ++++ include/linux/drbd.h | 4 ++-- 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 057ffed6eb7..784f4eb2ed6 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -965,6 +965,7 @@ struct drbd_conf { unsigned long rs_mark_time[DRBD_SYNC_MARKS]; /* current index into rs_mark_{left,time} */ int rs_last_mark; + unsigned long rs_last_bcast; /* [unit jiffies] */ /* where does the admin want us to start? (sector) */ sector_t ov_start_sector; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 298dd3e35e0..d339a2754a8 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -3295,6 +3295,12 @@ void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib) unsigned seq; int err = -ENOMEM; + if (sib->sib_reason == SIB_SYNC_PROGRESS && + time_after(jiffies, mdev->rs_last_bcast + HZ)) + mdev->rs_last_bcast = jiffies; + else + return; + seq = atomic_inc_return(&drbd_genl_seq); msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); if (!msg) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 64a7305c678..424dc7bdf9b 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1696,6 +1696,10 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) write_unlock_irq(&global_state_lock); if (r == SS_SUCCESS) { + /* reset rs_last_bcast when a resync or verify is started, + * to deal with potential jiffies wrap. */ + mdev->rs_last_bcast = jiffies - HZ; + dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n", drbd_conn_str(ns.conn), (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10), diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 0b93e5e2e06..0c5a18ec322 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -52,8 +52,8 @@ #endif extern const char *drbd_buildtag(void); -#define REL_VERSION "8.3.11" -#define API_VERSION 88 +#define REL_VERSION "8.4.2" +#define API_VERSION 1 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 101 From 063eacf88cc1394ece125d106c05cba1ca03aa3d Mon Sep 17 00:00:00 2001 From: Jing Wang Date: Thu, 25 Oct 2012 15:00:56 +0800 Subject: [PATCH 592/609] drbd: check return of kmalloc in receive_uuids Signed-off-by: Jing Wang Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 0331ad0b61e..9a9d4fd264a 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3663,6 +3663,10 @@ static int receive_uuids(struct drbd_tconn *tconn, struct packet_info *pi) return config_unknown_volume(tconn, pi); p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO); + if (!p_uuid) { + dev_err(DEV, "kmalloc of p_uuid failed\n"); + return false; + } for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++) p_uuid[i] = be64_to_cpu(p->uuid[i]); From ed635cb0674d6e4303d1a2e27d9e6e80b451a338 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 5 Nov 2012 11:54:30 +0100 Subject: [PATCH 593/609] drbd: if the replication link breaks during handshake, keep retrying The 8.3.12 commit drbd: Bugfix for the connection behavior fixes a "wasted established connection", if a former connection attempt failed during its early stages. However it opened a window for a regression, if a connection attempt fails during its last stages. The result was a terminated receiver thread, that left behind the supposedly transient "C_UNCONNECTED" state. Any later requests to change the connection state fail, as they wait for the connection state to "stabilize". Fix: short circuit and keep retrying to restablish a new connection, if we don't reach C_WF_REPORT_PARAMS. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 9a9d4fd264a..1599a1a6f1f 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1051,7 +1051,7 @@ randomize: rcu_read_unlock(); rv = conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE); - if (rv < SS_SUCCESS) { + if (rv < SS_SUCCESS || tconn->cstate != C_WF_REPORT_PARAMS) { clear_bit(STATE_SENT, &tconn->flags); return 0; } From f1d6a328bbe63b528721a25251ad8f5f1e997804 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Thu, 8 Nov 2012 16:12:31 -0800 Subject: [PATCH 594/609] drbd: use copy_highpage Use copy_highpage() to copy from one page to another. Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 1ab205a4bf6..8dc29502dc0 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1011,17 +1011,11 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must bm_set_page_unchanged(b->bm_pages[page_nr]); if (ctx->flags & BM_AIO_COPY_PAGES) { - void *src, *dest; page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_WAIT); - dest = kmap_atomic(page); - src = kmap_atomic(b->bm_pages[page_nr]); - memcpy(dest, src, PAGE_SIZE); - kunmap_atomic(src); - kunmap_atomic(dest); + copy_highpage(page, b->bm_pages[page_nr]); bm_store_page_idx(page, page_nr); } else page = b->bm_pages[page_nr]; - bio->bi_bdev = mdev->ldev->md_bdev; bio->bi_sector = on_disk_sector; /* bio_add_page of a single page to an empty bio will always succeed, From 1f118bc479173bff44aa591bcfa065e68884f2c5 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Thu, 8 Nov 2012 16:12:22 -0800 Subject: [PATCH 595/609] cciss: cleanup bitops usage - Remove unnecessary correction of bit and address - Use BITS_TO_LONGS macro to calculate bitmap size - Use bitmap_zero() Signed-off-by: Akinobu Mita Cc: Mike Miller Cc: Stephen M. Cameron Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index b0f553b26d0..bda6d128237 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -978,8 +979,7 @@ static CommandList_struct *cmd_alloc(ctlr_info_t *h) i = find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds); if (i == h->nr_cmds) return NULL; - } while (test_and_set_bit(i & (BITS_PER_LONG - 1), - h->cmd_pool_bits + (i / BITS_PER_LONG)) != 0); + } while (test_and_set_bit(i, h->cmd_pool_bits) != 0); c = h->cmd_pool + i; memset(c, 0, sizeof(CommandList_struct)); cmd_dma_handle = h->cmd_pool_dhandle + i * sizeof(CommandList_struct); @@ -1046,8 +1046,7 @@ static void cmd_free(ctlr_info_t *h, CommandList_struct *c) int i; i = c - h->cmd_pool; - clear_bit(i & (BITS_PER_LONG - 1), - h->cmd_pool_bits + (i / BITS_PER_LONG)); + clear_bit(i, h->cmd_pool_bits); h->nr_frees++; } @@ -4812,8 +4811,7 @@ static __devinit int cciss_init_reset_devices(struct pci_dev *pdev) static __devinit int cciss_allocate_cmd_pool(ctlr_info_t *h) { - h->cmd_pool_bits = kmalloc( - DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG) * + h->cmd_pool_bits = kmalloc(BITS_TO_LONGS(h->nr_cmds) * sizeof(unsigned long), GFP_KERNEL); h->cmd_pool = pci_alloc_consistent(h->pdev, h->nr_cmds * sizeof(CommandList_struct), @@ -5068,9 +5066,7 @@ reinit_after_soft_reset: pci_set_drvdata(pdev, h); /* command and error info recs zeroed out before they are used */ - memset(h->cmd_pool_bits, 0, - DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG) - * sizeof(unsigned long)); + bitmap_zero(h->cmd_pool_bits, h->nr_cmds); h->num_luns = 0; h->highest_lun = -1; From d48c152a41c8cd6de832397b4ea6f0429ad86318 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Thu, 8 Nov 2012 16:12:23 -0800 Subject: [PATCH 596/609] cciss: use check_signature() Use check_signature() to find a signature in the mmio address. Signed-off-by: Akinobu Mita Cc: Mike Miller Cc: Stephen M. Cameron Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index bda6d128237..e04c63ec775 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -42,8 +42,8 @@ #include #include #include +#include #include -#include #include #include @@ -4267,10 +4267,7 @@ static void __devinit cciss_find_board_params(ctlr_info_t *h) static inline bool CISS_signature_present(ctlr_info_t *h) { - if ((readb(&h->cfgtable->Signature[0]) != 'C') || - (readb(&h->cfgtable->Signature[1]) != 'I') || - (readb(&h->cfgtable->Signature[2]) != 'S') || - (readb(&h->cfgtable->Signature[3]) != 'S')) { + if (!check_signature(h->cfgtable->Signature, "CISS", 4)) { dev_warn(&h->pdev->dev, "not a valid CISS config table\n"); return false; } From 1ad7e89940d5ac411928189e1a4a01901dbf590f Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Thu, 8 Nov 2012 16:12:25 -0800 Subject: [PATCH 597/609] block: store partition_meta_info.uuid as a string This will allow other types of UUID to be stored here, aside from true UUIDs. This also simplifies code that uses this field, since it's usually constructed from a, used as a, or compared to other, strings. Note: A simplistic approach here would be to set uuid_str[36]=0 whenever a /PARTNROFF option was found to be present. However, this modifies the input string, and causes subsequent calls to devt_from_partuuid() not to see the /PARTNROFF option, which causes different results. In order to avoid misleading future maintainers, this parameter is marked const. Signed-off-by: Stephen Warren Cc: Tejun Heo Cc: Will Drewry Cc: Kay Sievers Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- block/genhd.c | 8 +------- block/partitions/efi.c | 7 +------ include/linux/genhd.h | 8 ++++++-- init/do_mounts.c | 28 +++++++++++++++++----------- 4 files changed, 25 insertions(+), 26 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index 6cace663a80..b281f3a2d26 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -743,7 +743,6 @@ void __init printk_all_partitions(void) struct hd_struct *part; char name_buf[BDEVNAME_SIZE]; char devt_buf[BDEVT_SIZE]; - char uuid_buf[PARTITION_META_INFO_UUIDLTH * 2 + 5]; /* * Don't show empty devices or things that have been @@ -762,16 +761,11 @@ void __init printk_all_partitions(void) while ((part = disk_part_iter_next(&piter))) { bool is_part0 = part == &disk->part0; - uuid_buf[0] = '\0'; - if (part->info) - snprintf(uuid_buf, sizeof(uuid_buf), "%pU", - part->info->uuid); - printk("%s%s %10llu %s %s", is_part0 ? "" : " ", bdevt_str(part_devt(part), devt_buf), (unsigned long long)part_nr_sects_read(part) >> 1 , disk_name(disk, part->partno, name_buf), - uuid_buf); + part->info ? part->info->uuid : ""); if (is_part0) { if (disk->driverfs_dev != NULL && disk->driverfs_dev->driver != NULL) diff --git a/block/partitions/efi.c b/block/partitions/efi.c index 6296b403c67..b62fb88b871 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c @@ -620,7 +620,6 @@ int efi_partition(struct parsed_partitions *state) gpt_entry *ptes = NULL; u32 i; unsigned ssz = bdev_logical_block_size(state->bdev) / 512; - u8 unparsed_guid[37]; if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) { kfree(gpt); @@ -649,11 +648,7 @@ int efi_partition(struct parsed_partitions *state) state->parts[i + 1].flags = ADDPART_FLAG_RAID; info = &state->parts[i + 1].info; - /* Instead of doing a manual swap to big endian, reuse the - * common ASCII hex format as the interim. - */ - efi_guid_unparse(&ptes[i].unique_partition_guid, unparsed_guid); - part_pack_uuid(unparsed_guid, info->uuid); + efi_guid_unparse(&ptes[i].unique_partition_guid, info->uuid); /* Naively convert UTF16-LE to 7 bits. */ label_max = min(sizeof(info->volname) - 1, diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 4f440b3e89f..79b8bba1936 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -88,10 +88,14 @@ struct disk_stats { }; #define PARTITION_META_INFO_VOLNAMELTH 64 -#define PARTITION_META_INFO_UUIDLTH 16 +/* + * Enough for the string representation of any kind of UUID plus NULL. + * EFI UUID is 36 characters. MSDOS UUID is 11 characters. + */ +#define PARTITION_META_INFO_UUIDLTH 37 struct partition_meta_info { - u8 uuid[PARTITION_META_INFO_UUIDLTH]; /* always big endian */ + char uuid[PARTITION_META_INFO_UUIDLTH]; u8 volname[PARTITION_META_INFO_VOLNAMELTH]; }; diff --git a/init/do_mounts.c b/init/do_mounts.c index f8a66424360..b28ec581932 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -69,23 +69,28 @@ __setup("ro", readonly); __setup("rw", readwrite); #ifdef CONFIG_BLOCK +struct uuidcmp { + const char *uuid; + int len; +}; + /** * match_dev_by_uuid - callback for finding a partition using its uuid * @dev: device passed in by the caller - * @data: opaque pointer to a 36 byte char array with a UUID + * @data: opaque pointer to the desired struct uuidcmp to match * * Returns 1 if the device matches, and 0 otherwise. */ static int match_dev_by_uuid(struct device *dev, void *data) { - u8 *uuid = data; + struct uuidcmp *cmp = data; struct hd_struct *part = dev_to_part(dev); if (!part->info) goto no_match; - if (memcmp(uuid, part->info->uuid, sizeof(part->info->uuid))) - goto no_match; + if (strncasecmp(cmp->uuid, part->info->uuid, cmp->len)) + goto no_match; return 1; no_match: @@ -95,7 +100,7 @@ no_match: /** * devt_from_partuuid - looks up the dev_t of a partition by its UUID - * @uuid: min 36 byte char array containing a hex ascii UUID + * @uuid: char array containing ascii UUID * * The function will return the first partition which contains a matching * UUID value in its partition_meta_info struct. This does not search @@ -106,11 +111,11 @@ no_match: * * Returns the matching dev_t on success or 0 on failure. */ -static dev_t devt_from_partuuid(char *uuid_str) +static dev_t devt_from_partuuid(const char *uuid_str) { dev_t res = 0; + struct uuidcmp cmp; struct device *dev = NULL; - u8 uuid[16]; struct gendisk *disk; struct hd_struct *part; int offset = 0; @@ -118,6 +123,9 @@ static dev_t devt_from_partuuid(char *uuid_str) if (strlen(uuid_str) < 36) goto done; + cmp.uuid = uuid_str; + cmp.len = 36; + /* Check for optional partition number offset attributes. */ if (uuid_str[36]) { char c = 0; @@ -134,10 +142,8 @@ static dev_t devt_from_partuuid(char *uuid_str) } } - /* Pack the requested UUID in the expected format. */ - part_pack_uuid(uuid_str, uuid); - - dev = class_find_device(&block_class, NULL, uuid, &match_dev_by_uuid); + dev = class_find_device(&block_class, NULL, &cmp, + &match_dev_by_uuid); if (!dev) goto done; From 283f8fc03927b0ef42a2faa60a0df5ec8c612edb Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Thu, 8 Nov 2012 16:12:27 -0800 Subject: [PATCH 598/609] init: reduce PARTUUID min length to 1 from 36 Reduce the minimum length for a root=PARTUUID= parameter to be considered valid from 36 to 1. EFI/GPT partition UUIDs are always exactly 36 characters long, hence the previous limit. However, the next patch will support DOS/MBR UUIDs too, which have a different, shorter, format. Instead of validating any particular length, just ensure that at least some non-empty value was given by the user. Also, consider a missing UUID value to be a parsing error, in the same vein as if /PARTNROFF exists and can't be parsed. As such, make both error cases print a message and disable rootwait. Convert to pr_err while we're at it. Signed-off-by: Stephen Warren Cc: Tejun Heo Cc: Will Drewry Cc: Kay Sievers Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- init/do_mounts.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/init/do_mounts.c b/init/do_mounts.c index b28ec581932..c950d7c93f9 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -119,27 +119,29 @@ static dev_t devt_from_partuuid(const char *uuid_str) struct gendisk *disk; struct hd_struct *part; int offset = 0; - - if (strlen(uuid_str) < 36) - goto done; + bool clear_root_wait = false; + char *slash; cmp.uuid = uuid_str; - cmp.len = 36; + slash = strchr(uuid_str, '/'); /* Check for optional partition number offset attributes. */ - if (uuid_str[36]) { + if (slash) { char c = 0; /* Explicitly fail on poor PARTUUID syntax. */ - if (sscanf(&uuid_str[36], - "/PARTNROFF=%d%c", &offset, &c) != 1) { - printk(KERN_ERR "VFS: PARTUUID= is invalid.\n" - "Expected PARTUUID=[/PARTNROFF=%%d]\n"); - if (root_wait) - printk(KERN_ERR - "Disabling rootwait; root= is invalid.\n"); - root_wait = 0; + if (sscanf(slash + 1, + "PARTNROFF=%d%c", &offset, &c) != 1) { + clear_root_wait = true; goto done; } + cmp.len = slash - uuid_str; + } else { + cmp.len = strlen(uuid_str); + } + + if (!cmp.len) { + clear_root_wait = true; + goto done; } dev = class_find_device(&block_class, NULL, &cmp, @@ -164,6 +166,13 @@ static dev_t devt_from_partuuid(const char *uuid_str) no_offset: put_device(dev); done: + if (clear_root_wait) { + pr_err("VFS: PARTUUID= is invalid.\n" + "Expected PARTUUID=[/PARTNROFF=%%d]\n"); + if (root_wait) + pr_err("Disabling rootwait; root= is invalid.\n"); + root_wait = 0; + } return res; } #endif From d33b98fc82b0908e91fb05ae081acaed7323f9d2 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Thu, 8 Nov 2012 16:12:28 -0800 Subject: [PATCH 599/609] block: partition: msdos: provide UUIDs for partitions The MSDOS/MBR partition table includes a 32-bit unique ID, often referred to as the NT disk signature. When combined with a partition number within the table, this can form a unique ID similar in concept to EFI/GPT's partition UUID. Constructing and recording this value in struct partition_meta_info allows MSDOS partitions to be referred to on the kernel command-line using the following syntax: root=PARTUUID=0002dd75-01 Signed-off-by: Stephen Warren Cc: Tejun Heo Cc: Will Drewry Cc: Kay Sievers Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- block/partitions/msdos.c | 21 +++++++++++++++++++-- init/do_mounts.c | 4 ++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c index 5f79a6677c6..8752a5d2656 100644 --- a/block/partitions/msdos.c +++ b/block/partitions/msdos.c @@ -94,6 +94,17 @@ static int aix_magic_present(struct parsed_partitions *state, unsigned char *p) return ret; } +static void set_info(struct parsed_partitions *state, int slot, + u32 disksig) +{ + struct partition_meta_info *info = &state->parts[slot].info; + + snprintf(info->uuid, sizeof(info->uuid), "%08x-%02x", disksig, + slot); + info->volname[0] = 0; + state->parts[slot].has_info = true; +} + /* * Create devices for each logical partition in an extended partition. * The logical partitions form a linked list, with each entry being @@ -106,7 +117,8 @@ static int aix_magic_present(struct parsed_partitions *state, unsigned char *p) */ static void parse_extended(struct parsed_partitions *state, - sector_t first_sector, sector_t first_size) + sector_t first_sector, sector_t first_size, + u32 disksig) { struct partition *p; Sector sect; @@ -166,6 +178,7 @@ static void parse_extended(struct parsed_partitions *state, } put_partition(state, state->next, next, size); + set_info(state, state->next, disksig); if (SYS_IND(p) == LINUX_RAID_PARTITION) state->parts[state->next].flags = ADDPART_FLAG_RAID; loopct = 0; @@ -437,6 +450,7 @@ int msdos_partition(struct parsed_partitions *state) struct partition *p; struct fat_boot_sector *fb; int slot; + u32 disksig; data = read_part_sector(state, 0, §); if (!data) @@ -491,6 +505,8 @@ int msdos_partition(struct parsed_partitions *state) #endif p = (struct partition *) (data + 0x1be); + disksig = le32_to_cpup((__le32 *)(data + 0x1b8)); + /* * Look for partitions in two passes: * First find the primary and DOS-type extended partitions. @@ -515,11 +531,12 @@ int msdos_partition(struct parsed_partitions *state) put_partition(state, slot, start, n); strlcat(state->pp_buf, " <", PAGE_SIZE); - parse_extended(state, start, size); + parse_extended(state, start, size, disksig); strlcat(state->pp_buf, " >", PAGE_SIZE); continue; } put_partition(state, slot, start, size); + set_info(state, slot, disksig); if (SYS_IND(p) == LINUX_RAID_PARTITION) state->parts[slot].flags = ADDPART_FLAG_RAID; if (SYS_IND(p) == DM6_PARTITION) diff --git a/init/do_mounts.c b/init/do_mounts.c index c950d7c93f9..1d1b6348f90 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -189,6 +189,10 @@ done: * used when disk name of partitioned disk ends on a digit. * 6) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the * unique id of a partition if the partition table provides it. + * The UUID may be either an EFI/GPT UUID, or refer to an MSDOS + * partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero- + * filled hex representation of the 32-bit "NT disk signature", and PP + * is a zero-filled hex representation of the 1-based partition number. * 7) PARTUUID=/PARTNROFF= to select a partition in relation to * a partition with a known unique id. * From 4d4f270f1880e52d89a33c944ee86f23d6c85541 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Fri, 16 Nov 2012 19:26:48 +0100 Subject: [PATCH 600/609] xen-blkback: move free persistent grants code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the code that frees persistent grants from the red-black tree to a function. This will make it easier for other consumers to move this to a common place. Signed-off-by: Roger Pau Monné Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 68 ++++++++++++++++------------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index a05961683ef..74374fb762a 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -212,6 +212,39 @@ static struct persistent_gnt *get_persistent_gnt(struct rb_root *root, return NULL; } +static void free_persistent_gnts(struct rb_root *root, unsigned int num) +{ + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct persistent_gnt *persistent_gnt; + int ret = 0; + int segs_to_unmap = 0; + + foreach_grant(persistent_gnt, root, node) { + BUG_ON(persistent_gnt->handle == + BLKBACK_INVALID_HANDLE); + gnttab_set_unmap_op(&unmap[segs_to_unmap], + (unsigned long) pfn_to_kaddr(page_to_pfn( + persistent_gnt->page)), + GNTMAP_host_map, + persistent_gnt->handle); + + pages[segs_to_unmap] = persistent_gnt->page; + rb_erase(&persistent_gnt->node, root); + kfree(persistent_gnt); + num--; + + if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST || + !rb_next(&persistent_gnt->node)) { + ret = gnttab_unmap_refs(unmap, NULL, pages, + segs_to_unmap); + BUG_ON(ret); + segs_to_unmap = 0; + } + } + BUG_ON(num != 0); +} + /* * Retrieve from the 'pending_reqs' a free pending_req structure to be used. */ @@ -358,11 +391,6 @@ int xen_blkif_schedule(void *arg) { struct xen_blkif *blkif = arg; struct xen_vbd *vbd = &blkif->vbd; - struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - struct persistent_gnt *persistent_gnt; - int ret = 0; - int segs_to_unmap = 0; xen_blkif_get(blkif); @@ -391,34 +419,12 @@ int xen_blkif_schedule(void *arg) } /* Free all persistent grant pages */ - if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) { - foreach_grant(persistent_gnt, &blkif->persistent_gnts, node) { - BUG_ON(persistent_gnt->handle == - BLKBACK_INVALID_HANDLE); - gnttab_set_unmap_op(&unmap[segs_to_unmap], - (unsigned long) pfn_to_kaddr(page_to_pfn( - persistent_gnt->page)), - GNTMAP_host_map, - persistent_gnt->handle); + if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) + free_persistent_gnts(&blkif->persistent_gnts, + blkif->persistent_gnt_c); - pages[segs_to_unmap] = persistent_gnt->page; - rb_erase(&persistent_gnt->node, - &blkif->persistent_gnts); - kfree(persistent_gnt); - blkif->persistent_gnt_c--; - - if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST || - !rb_next(&persistent_gnt->node)) { - ret = gnttab_unmap_refs(unmap, NULL, pages, - segs_to_unmap); - BUG_ON(ret); - segs_to_unmap = 0; - } - } - } - - BUG_ON(blkif->persistent_gnt_c != 0); BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); + blkif->persistent_gnt_c = 0; if (log_stats) print_stats(blkif); From 07c540a0b5f4674538b57ad85bc9306e44fb45dd Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Fri, 16 Nov 2012 19:26:47 +0100 Subject: [PATCH 601/609] xen-blkfront: free allocated page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Free the page allocated for the persistent grant. Signed-off-by: Roger Pau Monné Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index f1de806b0a6..96e9b00db08 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -806,6 +806,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) all_gnts = llist_del_all(&info->persistent_gnts); llist_for_each_entry(persistent_gnt, all_gnts, node) { gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); + __free_page(pfn_to_page(persistent_gnt->pfn)); kfree(persistent_gnt); } info->persistent_gnts_c = 0; From eed8c02e680c04cd737e0a9cef74e68d8eb0cefa Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Fri, 30 Nov 2012 11:42:40 +0100 Subject: [PATCH 602/609] wait: add wait_event_lock_irq() interface New wait_event{_interruptible}_lock_irq{_cmd} macros added. This commit moves the private wait_event_lock_irq() macro from MD to regular wait includes, introduces new macro wait_event_lock_irq_cmd() instead of using the old method with omitting cmd parameter which is ugly and makes a use of new macros in the MD. It also introduces the _interruptible_ variant. The use of new interface is when one have a special lock to protect data structures used in the condition, or one also needs to invoke "cmd" before putting it to sleep. All new macros are expected to be called with the lock taken. The lock is released before sleep and is reacquired afterwards. We will leave the macro with the lock held. Note to DM: IMO this should also fix theoretical race on waitqueue while using simultaneously wait_event_lock_irq() and wait_event() because of lack of locking around current state setting and wait queue removal. Signed-off-by: Lukas Czerner Cc: Neil Brown Cc: David Howells Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Jens Axboe --- drivers/md/md.c | 2 +- drivers/md/md.h | 26 ------- drivers/md/raid1.c | 15 ++-- drivers/md/raid10.c | 15 ++-- drivers/md/raid5.c | 12 ++-- include/linux/wait.h | 164 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 184 insertions(+), 50 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 9ab768acfb6..7e513a38cec 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -452,7 +452,7 @@ void md_flush_request(struct mddev *mddev, struct bio *bio) spin_lock_irq(&mddev->write_lock); wait_event_lock_irq(mddev->sb_wait, !mddev->flush_bio, - mddev->write_lock, /*nothing*/); + mddev->write_lock); mddev->flush_bio = bio; spin_unlock_irq(&mddev->write_lock); diff --git a/drivers/md/md.h b/drivers/md/md.h index af443ab868d..1e2fc3d9c74 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -551,32 +551,6 @@ struct md_thread { #define THREAD_WAKEUP 0 -#define __wait_event_lock_irq(wq, condition, lock, cmd) \ -do { \ - wait_queue_t __wait; \ - init_waitqueue_entry(&__wait, current); \ - \ - add_wait_queue(&wq, &__wait); \ - for (;;) { \ - set_current_state(TASK_UNINTERRUPTIBLE); \ - if (condition) \ - break; \ - spin_unlock_irq(&lock); \ - cmd; \ - schedule(); \ - spin_lock_irq(&lock); \ - } \ - current->state = TASK_RUNNING; \ - remove_wait_queue(&wq, &__wait); \ -} while (0) - -#define wait_event_lock_irq(wq, condition, lock, cmd) \ -do { \ - if (condition) \ - break; \ - __wait_event_lock_irq(wq, condition, lock, cmd); \ -} while (0) - static inline void safe_put_page(struct page *p) { if (p) put_page(p); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 8034fbd6190..534dd74a2da 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -822,7 +822,7 @@ static void raise_barrier(struct r1conf *conf) /* Wait until no block IO is waiting */ wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting, - conf->resync_lock, ); + conf->resync_lock); /* block any new IO from starting */ conf->barrier++; @@ -830,7 +830,7 @@ static void raise_barrier(struct r1conf *conf) /* Now wait for all pending IO to complete */ wait_event_lock_irq(conf->wait_barrier, !conf->nr_pending && conf->barrier < RESYNC_DEPTH, - conf->resync_lock, ); + conf->resync_lock); spin_unlock_irq(&conf->resync_lock); } @@ -864,8 +864,7 @@ static void wait_barrier(struct r1conf *conf) (conf->nr_pending && current->bio_list && !bio_list_empty(current->bio_list)), - conf->resync_lock, - ); + conf->resync_lock); conf->nr_waiting--; } conf->nr_pending++; @@ -898,10 +897,10 @@ static void freeze_array(struct r1conf *conf) spin_lock_irq(&conf->resync_lock); conf->barrier++; conf->nr_waiting++; - wait_event_lock_irq(conf->wait_barrier, - conf->nr_pending == conf->nr_queued+1, - conf->resync_lock, - flush_pending_writes(conf)); + wait_event_lock_irq_cmd(conf->wait_barrier, + conf->nr_pending == conf->nr_queued+1, + conf->resync_lock, + flush_pending_writes(conf)); spin_unlock_irq(&conf->resync_lock); } static void unfreeze_array(struct r1conf *conf) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 906ccbd0f7d..9a08f621b27 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -952,7 +952,7 @@ static void raise_barrier(struct r10conf *conf, int force) /* Wait until no block IO is waiting (unless 'force') */ wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting, - conf->resync_lock, ); + conf->resync_lock); /* block any new IO from starting */ conf->barrier++; @@ -960,7 +960,7 @@ static void raise_barrier(struct r10conf *conf, int force) /* Now wait for all pending IO to complete */ wait_event_lock_irq(conf->wait_barrier, !conf->nr_pending && conf->barrier < RESYNC_DEPTH, - conf->resync_lock, ); + conf->resync_lock); spin_unlock_irq(&conf->resync_lock); } @@ -993,8 +993,7 @@ static void wait_barrier(struct r10conf *conf) (conf->nr_pending && current->bio_list && !bio_list_empty(current->bio_list)), - conf->resync_lock, - ); + conf->resync_lock); conf->nr_waiting--; } conf->nr_pending++; @@ -1027,10 +1026,10 @@ static void freeze_array(struct r10conf *conf) spin_lock_irq(&conf->resync_lock); conf->barrier++; conf->nr_waiting++; - wait_event_lock_irq(conf->wait_barrier, - conf->nr_pending == conf->nr_queued+1, - conf->resync_lock, - flush_pending_writes(conf)); + wait_event_lock_irq_cmd(conf->wait_barrier, + conf->nr_pending == conf->nr_queued+1, + conf->resync_lock, + flush_pending_writes(conf)); spin_unlock_irq(&conf->resync_lock); } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index c5439dce029..2bf617d6f4f 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -466,7 +466,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector, do { wait_event_lock_irq(conf->wait_for_stripe, conf->quiesce == 0 || noquiesce, - conf->device_lock, /* nothing */); + conf->device_lock); sh = __find_stripe(conf, sector, conf->generation - previous); if (!sh) { if (!conf->inactive_blocked) @@ -480,8 +480,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector, (atomic_read(&conf->active_stripes) < (conf->max_nr_stripes *3/4) || !conf->inactive_blocked), - conf->device_lock, - ); + conf->device_lock); conf->inactive_blocked = 0; } else init_stripe(sh, sector, previous); @@ -1646,8 +1645,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) spin_lock_irq(&conf->device_lock); wait_event_lock_irq(conf->wait_for_stripe, !list_empty(&conf->inactive_list), - conf->device_lock, - ); + conf->device_lock); osh = get_free_stripe(conf); spin_unlock_irq(&conf->device_lock); atomic_set(&nsh->count, 1); @@ -4000,7 +3998,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) spin_lock_irq(&conf->device_lock); wait_event_lock_irq(conf->wait_for_stripe, conf->quiesce == 0, - conf->device_lock, /* nothing */); + conf->device_lock); atomic_inc(&conf->active_aligned_reads); spin_unlock_irq(&conf->device_lock); @@ -6088,7 +6086,7 @@ static void raid5_quiesce(struct mddev *mddev, int state) wait_event_lock_irq(conf->wait_for_stripe, atomic_read(&conf->active_stripes) == 0 && atomic_read(&conf->active_aligned_reads) == 0, - conf->device_lock, /* nothing */); + conf->device_lock); conf->quiesce = 1; spin_unlock_irq(&conf->device_lock); /* allow reshape to continue */ diff --git a/include/linux/wait.h b/include/linux/wait.h index 168dfe122dd..7cb64d4b499 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -550,6 +550,170 @@ do { \ __ret; \ }) + +#define __wait_event_lock_irq(wq, condition, lock, cmd) \ +do { \ + DEFINE_WAIT(__wait); \ + \ + for (;;) { \ + prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \ + if (condition) \ + break; \ + spin_unlock_irq(&lock); \ + cmd; \ + schedule(); \ + spin_lock_irq(&lock); \ + } \ + finish_wait(&wq, &__wait); \ +} while (0) + +/** + * wait_event_lock_irq_cmd - sleep until a condition gets true. The + * condition is checked under the lock. This + * is expected to be called with the lock + * taken. + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @lock: a locked spinlock_t, which will be released before cmd + * and schedule() and reacquired afterwards. + * @cmd: a command which is invoked outside the critical section before + * sleep + * + * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the + * @condition evaluates to true. The @condition is checked each time + * the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * This is supposed to be called while holding the lock. The lock is + * dropped before invoking the cmd and going to sleep and is reacquired + * afterwards. + */ +#define wait_event_lock_irq_cmd(wq, condition, lock, cmd) \ +do { \ + if (condition) \ + break; \ + __wait_event_lock_irq(wq, condition, lock, cmd); \ +} while (0) + +/** + * wait_event_lock_irq - sleep until a condition gets true. The + * condition is checked under the lock. This + * is expected to be called with the lock + * taken. + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @lock: a locked spinlock_t, which will be released before schedule() + * and reacquired afterwards. + * + * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the + * @condition evaluates to true. The @condition is checked each time + * the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * This is supposed to be called while holding the lock. The lock is + * dropped before going to sleep and is reacquired afterwards. + */ +#define wait_event_lock_irq(wq, condition, lock) \ +do { \ + if (condition) \ + break; \ + __wait_event_lock_irq(wq, condition, lock, ); \ +} while (0) + + +#define __wait_event_interruptible_lock_irq(wq, condition, \ + lock, ret, cmd) \ +do { \ + DEFINE_WAIT(__wait); \ + \ + for (;;) { \ + prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \ + if (condition) \ + break; \ + if (signal_pending(current)) { \ + ret = -ERESTARTSYS; \ + break; \ + } \ + spin_unlock_irq(&lock); \ + cmd; \ + schedule(); \ + spin_lock_irq(&lock); \ + } \ + finish_wait(&wq, &__wait); \ +} while (0) + +/** + * wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true. + * The condition is checked under the lock. This is expected to + * be called with the lock taken. + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @lock: a locked spinlock_t, which will be released before cmd and + * schedule() and reacquired afterwards. + * @cmd: a command which is invoked outside the critical section before + * sleep + * + * The process is put to sleep (TASK_INTERRUPTIBLE) until the + * @condition evaluates to true or a signal is received. The @condition is + * checked each time the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * This is supposed to be called while holding the lock. The lock is + * dropped before invoking the cmd and going to sleep and is reacquired + * afterwards. + * + * The macro will return -ERESTARTSYS if it was interrupted by a signal + * and 0 if @condition evaluated to true. + */ +#define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd) \ +({ \ + int __ret = 0; \ + \ + if (!(condition)) \ + __wait_event_interruptible_lock_irq(wq, condition, \ + lock, __ret, cmd); \ + __ret; \ +}) + +/** + * wait_event_interruptible_lock_irq - sleep until a condition gets true. + * The condition is checked under the lock. This is expected + * to be called with the lock taken. + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @lock: a locked spinlock_t, which will be released before schedule() + * and reacquired afterwards. + * + * The process is put to sleep (TASK_INTERRUPTIBLE) until the + * @condition evaluates to true or signal is received. The @condition is + * checked each time the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * This is supposed to be called while holding the lock. The lock is + * dropped before going to sleep and is reacquired afterwards. + * + * The macro will return -ERESTARTSYS if it was interrupted by a signal + * and 0 if @condition evaluated to true. + */ +#define wait_event_interruptible_lock_irq(wq, condition, lock) \ +({ \ + int __ret = 0; \ + \ + if (!(condition)) \ + __wait_event_interruptible_lock_irq(wq, condition, \ + lock, __ret, ); \ + __ret; \ +}) + + /* * These are the old interfaces to sleep waiting for an event. * They are racy. DO NOT use them, use the wait_event* interfaces above. From 7b5a35225b0d4fd779cf79d7624e63d1957f6c4d Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Fri, 30 Nov 2012 11:42:41 +0100 Subject: [PATCH 603/609] loop: Limit the number of requests in the bio list Currently there is not limitation of number of requests in the loop bio list. This can lead into some nasty situations when the caller spawns tons of bio requests taking huge amount of memory. This is even more obvious with discard where blkdev_issue_discard() will submit all bios for the range and wait for them to finish afterwards. On really big loop devices and slow backing file system this can lead to OOM situation as reported by Dave Chinner. With this patch we will wait in loop_make_request() if the number of bios in the loop bio list would exceed 'nr_congestion_on'. We'll wake up the process as we process the bios form the list. Some threshold hysteresis is in place to avoid high frequency oscillation. Signed-off-by: Lukas Czerner Reported-by: Dave Chinner Signed-off-by: Jens Axboe --- drivers/block/loop.c | 10 ++++++++++ include/linux/loop.h | 3 +++ 2 files changed, 13 insertions(+) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index e9d594fd12c..800aec7927d 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -463,6 +463,7 @@ out: */ static void loop_add_bio(struct loop_device *lo, struct bio *bio) { + lo->lo_bio_count++; bio_list_add(&lo->lo_bio_list, bio); } @@ -471,6 +472,7 @@ static void loop_add_bio(struct loop_device *lo, struct bio *bio) */ static struct bio *loop_get_bio(struct loop_device *lo) { + lo->lo_bio_count--; return bio_list_pop(&lo->lo_bio_list); } @@ -489,6 +491,10 @@ static void loop_make_request(struct request_queue *q, struct bio *old_bio) goto out; if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY))) goto out; + if (lo->lo_bio_count >= q->nr_congestion_on) + wait_event_lock_irq(lo->lo_req_wait, + lo->lo_bio_count < q->nr_congestion_off, + lo->lo_lock); loop_add_bio(lo, old_bio); wake_up(&lo->lo_event); spin_unlock_irq(&lo->lo_lock); @@ -546,6 +552,8 @@ static int loop_thread(void *data) continue; spin_lock_irq(&lo->lo_lock); bio = loop_get_bio(lo); + if (lo->lo_bio_count < lo->lo_queue->nr_congestion_off) + wake_up(&lo->lo_req_wait); spin_unlock_irq(&lo->lo_lock); BUG_ON(!bio); @@ -873,6 +881,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, lo->transfer = transfer_none; lo->ioctl = NULL; lo->lo_sizelimit = 0; + lo->lo_bio_count = 0; lo->old_gfp_mask = mapping_gfp_mask(mapping); mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); @@ -1660,6 +1669,7 @@ static int loop_add(struct loop_device **l, int i) lo->lo_number = i; lo->lo_thread = NULL; init_waitqueue_head(&lo->lo_event); + init_waitqueue_head(&lo->lo_req_wait); spin_lock_init(&lo->lo_lock); disk->major = LOOP_MAJOR; disk->first_minor = i << part_shift; diff --git a/include/linux/loop.h b/include/linux/loop.h index 6492181bcb1..460b60fa7ad 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -53,10 +53,13 @@ struct loop_device { spinlock_t lo_lock; struct bio_list lo_bio_list; + unsigned int lo_bio_count; int lo_state; struct mutex lo_ctl_mutex; struct task_struct *lo_thread; wait_queue_head_t lo_event; + /* wait queue for incoming requests */ + wait_queue_head_t lo_req_wait; struct request_queue *lo_queue; struct gendisk *lo_disk; From 2cecb7309897c872b977a60d53e5de0af4265dd6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 30 Nov 2012 21:20:15 +0100 Subject: [PATCH 604/609] drbd: fixup after wait_even_lock_irq() addition to generic code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling drbd yields: drivers/block/drbd/drbd_state.c: In function ‘_conn_request_state’: drivers/block/drbd/drbd_state.c:1804:5: error: macro "wait_event_lock_irq" passed 4 arguments, but takes just 3 drivers/block/drbd/drbd_state.c:1801:3: error: ‘wait_event_lock_irq’ undeclared (first use in this function) drivers/block/drbd/drbd_state.c:1801:3: note: each undeclared identifier is reported only once for each function it appears in drivers/block/drbd/drbd_state.c: At top level: drivers/block/drbd/drbd_state.c:1734:1: warning: ‘_conn_rq_cond’ defined but not used [-Wunused-function] Due to drbd having copied the MD definition for wait_event_lock_irq() as well. Kill them. Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 27 --------------------------- drivers/block/drbd/drbd_state.c | 3 +-- 2 files changed, 1 insertion(+), 29 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index ef72a72814c..6b51afa1aae 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -2334,30 +2334,3 @@ static inline void drbd_md_flush(struct drbd_conf *mdev) } #endif - -/* This is defined in drivers/md/md.h as well. Should go into wait.h */ -#define __wait_event_lock_irq(wq, condition, lock, cmd) \ -do { \ - wait_queue_t __wait; \ - init_waitqueue_entry(&__wait, current); \ - \ - add_wait_queue(&wq, &__wait); \ - for (;;) { \ - set_current_state(TASK_UNINTERRUPTIBLE); \ - if (condition) \ - break; \ - spin_unlock_irq(&lock); \ - cmd; \ - schedule(); \ - spin_lock_irq(&lock); \ - } \ - current->state = TASK_RUNNING; \ - remove_wait_queue(&wq, &__wait); \ -} while (0) - -#define wait_event_lock_irq(wq, condition, lock, cmd) \ -do { \ - if (condition) \ - break; \ - __wait_event_lock_irq(wq, condition, lock, cmd); \ -} while (0) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 69ef35266ba..53bf6182bac 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1800,8 +1800,7 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ spin_lock_irq(&tconn->req_lock); wait_event_lock_irq(tconn->ping_wait, (rv = _conn_rq_cond(tconn, mask, val)), - tconn->req_lock, - ); + tconn->req_lock); clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); if (rv < SS_SUCCESS) goto abort; From 298307ed1d2ac4815b0ce9a5f1c9cc643f4a9aca Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 16 Nov 2012 12:27:41 +0100 Subject: [PATCH 605/609] drbd: Remove obsolete check Smatch complained about it this redundanct check. The check was introduced in 2006-09-13. On 2007-07-24 the body of the function was enclosed by get_ldev()/put_ldev() reference counting. Since then the check is useless and miss leading. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 52de26daa1f..8c13eeb83c5 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -840,7 +840,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) } spin_lock_irq(&mdev->ldev->md.uuid_lock); for (i = UI_CURRENT; i < UI_SIZE; i++) - p->uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0; + p->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]); spin_unlock_irq(&mdev->ldev->md.uuid_lock); mdev->comm_bm_set = drbd_bm_total_weight(mdev); From 691631c0652bc47e6d20b0d981e23a9025fd794e Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 26 Oct 2012 00:41:50 +0200 Subject: [PATCH 606/609] drbd: respect no-md-barriers setting also when changed online via disk-options We need to propagate the configuration into the flag bits, or it won't be effective. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 76bb3a684b8..536f94a1ba5 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1230,6 +1230,11 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) else mdev->ldev->md.flags |= MDF_AL_DISABLED; + if (new_disk_conf->md_flushes) + clear_bit(MD_NO_FUA, &mdev->flags); + else + set_bit(MD_NO_FUA, &mdev->flags); + drbd_bump_write_ordering(mdev->tconn, WO_bdev_flush); drbd_md_sync(mdev); From 13c76aba7846647f86d479293ae0a0adc1ca840a Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 22 Nov 2012 17:06:00 +0100 Subject: [PATCH 607/609] drbd: close race between drbd_set_role and drbd_connect drbd_set_role(, R_PRIMARY, ) does the state change to Primary, some more housekeeping, and possibly generates a new UUID set. All of this holding the "state_mutex". The connection handshake involves sending of various state information, including the current data generation UUID set, and two connection state changes from C_WF_CONNECTION to C_WF_REPORT_PARAMS further to a number of different outcomes, resync being one of them. If the connection handshake happens between the state change to Primary and the generation of the new UUIDs, the resync decision based on the old UUID set may be confused, depending on circumstances. Make sure that, before we do the handshake, any promotion to Primary role will either be complete (including the housekeeping stuff), or can see, and serialize with, the ongoing handshake, based on the "STATE_SENT" bit, which is set when we start the handshake, and cleared only when we leave C_WF_REPORT_PARAMS again. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1599a1a6f1f..a9eccfc6079 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1037,6 +1037,16 @@ randomize: rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) { kref_get(&mdev->kref); + /* Prevent a race between resync-handshake and + * being promoted to Primary. + * + * Grab and release the state mutex, so we know that any current + * drbd_set_role() is finished, and any incoming drbd_set_role + * will see the STATE_SENT flag, and wait for it to be cleared. + */ + mutex_lock(mdev->state_mutex); + mutex_unlock(mdev->state_mutex); + rcu_read_unlock(); if (discard_my_data) From ef86b77957a97315d43ff72c9b83361ba1a07963 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 6 Dec 2012 10:34:34 +0100 Subject: [PATCH 608/609] drbd: Fix drbdsetup wait-connect, wait-sync etc... commands This was introduces when moving the code over from the 8.3 codebase with commit 328e0f125bf41f4f33f684db22015f92cb44fe56 Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 536f94a1ba5..2af26fc9528 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -3297,11 +3297,12 @@ void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib) unsigned seq; int err = -ENOMEM; - if (sib->sib_reason == SIB_SYNC_PROGRESS && - time_after(jiffies, mdev->rs_last_bcast + HZ)) - mdev->rs_last_bcast = jiffies; - else - return; + if (sib->sib_reason == SIB_SYNC_PROGRESS) { + if (time_after(jiffies, mdev->rs_last_bcast + HZ)) + mdev->rs_last_bcast = jiffies; + else + return; + } seq = atomic_inc_return(&drbd_genl_seq); msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); From d2ec180c23a5a1bfe34d8638b0342a47c00cf70f Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 6 Dec 2012 11:11:04 +0100 Subject: [PATCH 609/609] drbd: update Kconfig to match current dependencies We no longer need the connector. But we need libcrc32c. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/Kconfig | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig index df098378739..7845bd6ee41 100644 --- a/drivers/block/drbd/Kconfig +++ b/drivers/block/drbd/Kconfig @@ -2,13 +2,14 @@ # DRBD device driver configuration # -comment "DRBD disabled because PROC_FS, INET or CONNECTOR not selected" - depends on PROC_FS='n' || INET='n' || CONNECTOR='n' +comment "DRBD disabled because PROC_FS or INET not selected" + depends on PROC_FS='n' || INET='n' config BLK_DEV_DRBD tristate "DRBD Distributed Replicated Block Device support" - depends on PROC_FS && INET && CONNECTOR + depends on PROC_FS && INET select LRU_CACHE + select LIBCRC32C default n help @@ -58,7 +59,8 @@ config DRBD_FAULT_INJECTION 32 data read 64 read ahead 128 kmalloc of bitmap - 256 allocation of EE (epoch_entries) + 256 allocation of peer_requests + 512 insert data corruption on receiving side fault_devs: bitmask of minor numbers fault_rate: frequency in percent