drbd: factored tl_restart() out of tl_clear().
If IO was frozen for a temporal network outage, resend the content of the transfer-log into the newly established connection. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
This commit is contained in:
parent
2a80699f80
commit
11b58e73a3
5 changed files with 103 additions and 40 deletions
|
@ -1138,6 +1138,8 @@ extern void drbd_free_resources(struct drbd_conf *mdev);
|
||||||
extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
|
extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
|
||||||
unsigned int set_size);
|
unsigned int set_size);
|
||||||
extern void tl_clear(struct drbd_conf *mdev);
|
extern void tl_clear(struct drbd_conf *mdev);
|
||||||
|
enum drbd_req_event;
|
||||||
|
extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what);
|
||||||
extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *);
|
extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *);
|
||||||
extern void drbd_free_sock(struct drbd_conf *mdev);
|
extern void drbd_free_sock(struct drbd_conf *mdev);
|
||||||
extern int drbd_send(struct drbd_conf *mdev, struct socket *sock,
|
extern int drbd_send(struct drbd_conf *mdev, struct socket *sock,
|
||||||
|
|
|
@ -333,6 +333,77 @@ bail:
|
||||||
drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
|
drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* _tl_restart() - Walks the transfer log, and applies an action to all requests
|
||||||
|
* @mdev: DRBD device.
|
||||||
|
* @what: The action/event to perform with all request objects
|
||||||
|
*
|
||||||
|
* @what might be one of connection_lost_while_pending, resend, fail_frozen_disk_io,
|
||||||
|
* restart_frozen_disk_io.
|
||||||
|
*/
|
||||||
|
static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
|
||||||
|
{
|
||||||
|
struct drbd_tl_epoch *b, *tmp, **pn;
|
||||||
|
struct list_head *le, *tle;
|
||||||
|
struct drbd_request *req;
|
||||||
|
int rv, n_writes, n_reads;
|
||||||
|
|
||||||
|
b = mdev->oldest_tle;
|
||||||
|
pn = &mdev->oldest_tle;
|
||||||
|
while (b) {
|
||||||
|
n_writes = 0;
|
||||||
|
n_reads = 0;
|
||||||
|
list_for_each_safe(le, tle, &b->requests) {
|
||||||
|
req = list_entry(le, struct drbd_request, tl_requests);
|
||||||
|
rv = _req_mod(req, what);
|
||||||
|
|
||||||
|
n_writes += (rv & MR_WRITE) >> MR_WRITE_SHIFT;
|
||||||
|
n_reads += (rv & MR_READ) >> MR_READ_SHIFT;
|
||||||
|
}
|
||||||
|
tmp = b->next;
|
||||||
|
|
||||||
|
if (n_writes + n_reads) {
|
||||||
|
if (what == resend) {
|
||||||
|
b->n_writes = n_writes;
|
||||||
|
if (b->w.cb == NULL) {
|
||||||
|
b->w.cb = w_send_barrier;
|
||||||
|
inc_ap_pending(mdev);
|
||||||
|
set_bit(CREATE_BARRIER, &mdev->flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
drbd_queue_work(&mdev->data.work, &b->w);
|
||||||
|
}
|
||||||
|
pn = &b->next;
|
||||||
|
} else {
|
||||||
|
/* there could still be requests on that ring list,
|
||||||
|
* in case local io is still pending */
|
||||||
|
list_del(&b->requests);
|
||||||
|
|
||||||
|
/* dec_ap_pending corresponding to queue_barrier.
|
||||||
|
* the newest barrier may not have been queued yet,
|
||||||
|
* in which case w.cb is still NULL. */
|
||||||
|
if (b->w.cb != NULL)
|
||||||
|
dec_ap_pending(mdev);
|
||||||
|
|
||||||
|
if (b == mdev->newest_tle) {
|
||||||
|
/* recycle, but reinit! */
|
||||||
|
D_ASSERT(tmp == NULL);
|
||||||
|
INIT_LIST_HEAD(&b->requests);
|
||||||
|
INIT_LIST_HEAD(&b->w.list);
|
||||||
|
b->w.cb = NULL;
|
||||||
|
b->br_number = net_random();
|
||||||
|
b->n_writes = 0;
|
||||||
|
|
||||||
|
*pn = b;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
*pn = tmp;
|
||||||
|
kfree(b);
|
||||||
|
}
|
||||||
|
b = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
|
* tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
|
||||||
|
@ -344,48 +415,12 @@ bail:
|
||||||
*/
|
*/
|
||||||
void tl_clear(struct drbd_conf *mdev)
|
void tl_clear(struct drbd_conf *mdev)
|
||||||
{
|
{
|
||||||
struct drbd_tl_epoch *b, *tmp;
|
|
||||||
struct list_head *le, *tle;
|
struct list_head *le, *tle;
|
||||||
struct drbd_request *r;
|
struct drbd_request *r;
|
||||||
int new_initial_bnr = net_random();
|
|
||||||
|
|
||||||
spin_lock_irq(&mdev->req_lock);
|
spin_lock_irq(&mdev->req_lock);
|
||||||
|
|
||||||
b = mdev->oldest_tle;
|
_tl_restart(mdev, connection_lost_while_pending);
|
||||||
while (b) {
|
|
||||||
list_for_each_safe(le, tle, &b->requests) {
|
|
||||||
r = list_entry(le, struct drbd_request, tl_requests);
|
|
||||||
/* It would be nice to complete outside of spinlock.
|
|
||||||
* But this is easier for now. */
|
|
||||||
_req_mod(r, connection_lost_while_pending);
|
|
||||||
}
|
|
||||||
tmp = b->next;
|
|
||||||
|
|
||||||
/* there could still be requests on that ring list,
|
|
||||||
* in case local io is still pending */
|
|
||||||
list_del(&b->requests);
|
|
||||||
|
|
||||||
/* dec_ap_pending corresponding to queue_barrier.
|
|
||||||
* the newest barrier may not have been queued yet,
|
|
||||||
* in which case w.cb is still NULL. */
|
|
||||||
if (b->w.cb != NULL)
|
|
||||||
dec_ap_pending(mdev);
|
|
||||||
|
|
||||||
if (b == mdev->newest_tle) {
|
|
||||||
/* recycle, but reinit! */
|
|
||||||
D_ASSERT(tmp == NULL);
|
|
||||||
INIT_LIST_HEAD(&b->requests);
|
|
||||||
INIT_LIST_HEAD(&b->w.list);
|
|
||||||
b->w.cb = NULL;
|
|
||||||
b->br_number = new_initial_bnr;
|
|
||||||
b->n_writes = 0;
|
|
||||||
|
|
||||||
mdev->oldest_tle = b;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
kfree(b);
|
|
||||||
b = tmp;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* we expect this list to be empty. */
|
/* we expect this list to be empty. */
|
||||||
D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
|
D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
|
||||||
|
@ -406,6 +441,13 @@ void tl_clear(struct drbd_conf *mdev)
|
||||||
spin_unlock_irq(&mdev->req_lock);
|
spin_unlock_irq(&mdev->req_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
|
||||||
|
{
|
||||||
|
spin_lock_irq(&mdev->req_lock);
|
||||||
|
_tl_restart(mdev, what);
|
||||||
|
spin_unlock_irq(&mdev->req_lock);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* cl_wide_st_chg() - TRUE if the state change is a cluster wide one
|
* cl_wide_st_chg() - TRUE if the state change is a cluster wide one
|
||||||
* @mdev: DRBD device.
|
* @mdev: DRBD device.
|
||||||
|
|
|
@ -776,9 +776,6 @@ static int drbd_connect(struct drbd_conf *mdev)
|
||||||
|
|
||||||
D_ASSERT(!mdev->data.socket);
|
D_ASSERT(!mdev->data.socket);
|
||||||
|
|
||||||
if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags))
|
|
||||||
dev_err(DEV, "CREATE_BARRIER flag was set in drbd_connect - now cleared!\n");
|
|
||||||
|
|
||||||
if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS)
|
if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS)
|
||||||
return -2;
|
return -2;
|
||||||
|
|
||||||
|
|
|
@ -634,6 +634,20 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
|
||||||
/* else: done by handed_over_to_network */
|
/* else: done by handed_over_to_network */
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case resend:
|
||||||
|
/* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
|
||||||
|
before the connection loss; only P_BARRIER_ACK was missing.
|
||||||
|
Trowing them out of the TL here by pretending we got a BARRIER_ACK
|
||||||
|
TODO: Either resync them, or ensure peer was not rebooted. */
|
||||||
|
if (!(req->rq_state & RQ_NET_OK)) {
|
||||||
|
if (req->w.cb) {
|
||||||
|
drbd_queue_work(&mdev->data.work, &req->w);
|
||||||
|
rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* else, fall through to barrier_acked */
|
||||||
|
|
||||||
case barrier_acked:
|
case barrier_acked:
|
||||||
if (!(req->rq_state & RQ_WRITE))
|
if (!(req->rq_state & RQ_WRITE))
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -104,6 +104,7 @@ enum drbd_req_event {
|
||||||
read_ahead_completed_with_error,
|
read_ahead_completed_with_error,
|
||||||
write_completed_with_error,
|
write_completed_with_error,
|
||||||
completed_ok,
|
completed_ok,
|
||||||
|
resend,
|
||||||
nothing, /* for tracing only */
|
nothing, /* for tracing only */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -206,6 +207,13 @@ enum drbd_req_state_bits {
|
||||||
|
|
||||||
#define RQ_WRITE (1UL << __RQ_WRITE)
|
#define RQ_WRITE (1UL << __RQ_WRITE)
|
||||||
|
|
||||||
|
/* For waking up the frozen transfer log mod_req() has to return if the request
|
||||||
|
should be counted in the epoch object*/
|
||||||
|
#define MR_WRITE_SHIFT 0
|
||||||
|
#define MR_WRITE (1 << MR_WRITE_SHIFT)
|
||||||
|
#define MR_READ_SHIFT 1
|
||||||
|
#define MR_READ (1 << MR_READ_SHIFT)
|
||||||
|
|
||||||
/* epoch entries */
|
/* epoch entries */
|
||||||
static inline
|
static inline
|
||||||
struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector)
|
struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector)
|
||||||
|
|
Reference in a new issue