dlm: fix unlock balance warnings
The in_recovery rw_semaphore has always been acquired and released by different threads by design. To work around the "BUG: bad unlock balance detected!" messages, adjust things so the dlm_recoverd thread always does both down_write and up_write. Signed-off-by: David Teigland <teigland@redhat.com>
This commit is contained in:
parent
6ad2291624
commit
475f230c60
|
@ -604,6 +604,7 @@ struct dlm_ls {
|
||||||
struct idr ls_recover_idr;
|
struct idr ls_recover_idr;
|
||||||
spinlock_t ls_recover_idr_lock;
|
spinlock_t ls_recover_idr_lock;
|
||||||
wait_queue_head_t ls_wait_general;
|
wait_queue_head_t ls_wait_general;
|
||||||
|
wait_queue_head_t ls_recover_lock_wait;
|
||||||
struct mutex ls_clear_proc_locks;
|
struct mutex ls_clear_proc_locks;
|
||||||
|
|
||||||
struct list_head ls_root_list; /* root resources */
|
struct list_head ls_root_list; /* root resources */
|
||||||
|
@ -616,15 +617,40 @@ struct dlm_ls {
|
||||||
char ls_name[1];
|
char ls_name[1];
|
||||||
};
|
};
|
||||||
|
|
||||||
#define LSFL_WORK 0
|
/*
|
||||||
#define LSFL_RUNNING 1
|
* LSFL_RECOVER_STOP - dlm_ls_stop() sets this to tell dlm recovery routines
|
||||||
#define LSFL_RECOVERY_STOP 2
|
* that they should abort what they're doing so new recovery can be started.
|
||||||
#define LSFL_RCOM_READY 3
|
*
|
||||||
#define LSFL_RCOM_WAIT 4
|
* LSFL_RECOVER_DOWN - dlm_ls_stop() sets this to tell dlm_recoverd that it
|
||||||
#define LSFL_UEVENT_WAIT 5
|
* should do down_write() on the in_recovery rw_semaphore. (doing down_write
|
||||||
#define LSFL_TIMEWARN 6
|
* within dlm_ls_stop causes complaints about the lock acquired/released
|
||||||
#define LSFL_CB_DELAY 7
|
* in different contexts.)
|
||||||
#define LSFL_NODIR 8
|
*
|
||||||
|
* LSFL_RECOVER_LOCK - dlm_recoverd holds the in_recovery rw_semaphore.
|
||||||
|
* It sets this after it is done with down_write() on the in_recovery
|
||||||
|
* rw_semaphore and clears it after it has released the rw_semaphore.
|
||||||
|
*
|
||||||
|
* LSFL_RECOVER_WORK - dlm_ls_start() sets this to tell dlm_recoverd that it
|
||||||
|
* should begin recovery of the lockspace.
|
||||||
|
*
|
||||||
|
* LSFL_RUNNING - set when normal locking activity is enabled.
|
||||||
|
* dlm_ls_stop() clears this to tell dlm locking routines that they should
|
||||||
|
* quit what they are doing so recovery can run. dlm_recoverd sets
|
||||||
|
* this after recovery is finished.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define LSFL_RECOVER_STOP 0
|
||||||
|
#define LSFL_RECOVER_DOWN 1
|
||||||
|
#define LSFL_RECOVER_LOCK 2
|
||||||
|
#define LSFL_RECOVER_WORK 3
|
||||||
|
#define LSFL_RUNNING 4
|
||||||
|
|
||||||
|
#define LSFL_RCOM_READY 5
|
||||||
|
#define LSFL_RCOM_WAIT 6
|
||||||
|
#define LSFL_UEVENT_WAIT 7
|
||||||
|
#define LSFL_TIMEWARN 8
|
||||||
|
#define LSFL_CB_DELAY 9
|
||||||
|
#define LSFL_NODIR 10
|
||||||
|
|
||||||
/* much of this is just saving user space pointers associated with the
|
/* much of this is just saving user space pointers associated with the
|
||||||
lock that we pass back to the user lib with an ast */
|
lock that we pass back to the user lib with an ast */
|
||||||
|
@ -667,7 +693,7 @@ static inline int dlm_locking_stopped(struct dlm_ls *ls)
|
||||||
|
|
||||||
static inline int dlm_recovery_stopped(struct dlm_ls *ls)
|
static inline int dlm_recovery_stopped(struct dlm_ls *ls)
|
||||||
{
|
{
|
||||||
return test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
|
return test_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int dlm_no_directory(struct dlm_ls *ls)
|
static inline int dlm_no_directory(struct dlm_ls *ls)
|
||||||
|
|
|
@ -582,8 +582,6 @@ static int new_lockspace(const char *name, const char *cluster,
|
||||||
INIT_LIST_HEAD(&ls->ls_root_list);
|
INIT_LIST_HEAD(&ls->ls_root_list);
|
||||||
init_rwsem(&ls->ls_root_sem);
|
init_rwsem(&ls->ls_root_sem);
|
||||||
|
|
||||||
down_write(&ls->ls_in_recovery);
|
|
||||||
|
|
||||||
spin_lock(&lslist_lock);
|
spin_lock(&lslist_lock);
|
||||||
ls->ls_create_count = 1;
|
ls->ls_create_count = 1;
|
||||||
list_add(&ls->ls_list, &lslist);
|
list_add(&ls->ls_list, &lslist);
|
||||||
|
@ -597,13 +595,24 @@ static int new_lockspace(const char *name, const char *cluster,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* needs to find ls in lslist */
|
init_waitqueue_head(&ls->ls_recover_lock_wait);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Once started, dlm_recoverd first looks for ls in lslist, then
|
||||||
|
* initializes ls_in_recovery as locked in "down" mode. We need
|
||||||
|
* to wait for the wakeup from dlm_recoverd because in_recovery
|
||||||
|
* has to start out in down mode.
|
||||||
|
*/
|
||||||
|
|
||||||
error = dlm_recoverd_start(ls);
|
error = dlm_recoverd_start(ls);
|
||||||
if (error) {
|
if (error) {
|
||||||
log_error(ls, "can't start dlm_recoverd %d", error);
|
log_error(ls, "can't start dlm_recoverd %d", error);
|
||||||
goto out_callback;
|
goto out_callback;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
wait_event(ls->ls_recover_lock_wait,
|
||||||
|
test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags));
|
||||||
|
|
||||||
ls->ls_kobj.kset = dlm_kset;
|
ls->ls_kobj.kset = dlm_kset;
|
||||||
error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
|
error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
|
||||||
"%s", ls->ls_name);
|
"%s", ls->ls_name);
|
||||||
|
|
|
@ -616,13 +616,13 @@ int dlm_ls_stop(struct dlm_ls *ls)
|
||||||
down_write(&ls->ls_recv_active);
|
down_write(&ls->ls_recv_active);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Abort any recovery that's in progress (see RECOVERY_STOP,
|
* Abort any recovery that's in progress (see RECOVER_STOP,
|
||||||
* dlm_recovery_stopped()) and tell any other threads running in the
|
* dlm_recovery_stopped()) and tell any other threads running in the
|
||||||
* dlm to quit any processing (see RUNNING, dlm_locking_stopped()).
|
* dlm to quit any processing (see RUNNING, dlm_locking_stopped()).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
spin_lock(&ls->ls_recover_lock);
|
spin_lock(&ls->ls_recover_lock);
|
||||||
set_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
|
set_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
|
||||||
new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags);
|
new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags);
|
||||||
ls->ls_recover_seq++;
|
ls->ls_recover_seq++;
|
||||||
spin_unlock(&ls->ls_recover_lock);
|
spin_unlock(&ls->ls_recover_lock);
|
||||||
|
@ -642,12 +642,16 @@ int dlm_ls_stop(struct dlm_ls *ls)
|
||||||
* when recovery is complete.
|
* when recovery is complete.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (new)
|
if (new) {
|
||||||
down_write(&ls->ls_in_recovery);
|
set_bit(LSFL_RECOVER_DOWN, &ls->ls_flags);
|
||||||
|
wake_up_process(ls->ls_recoverd_task);
|
||||||
|
wait_event(ls->ls_recover_lock_wait,
|
||||||
|
test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The recoverd suspend/resume makes sure that dlm_recoverd (if
|
* The recoverd suspend/resume makes sure that dlm_recoverd (if
|
||||||
* running) has noticed RECOVERY_STOP above and quit processing the
|
* running) has noticed RECOVER_STOP above and quit processing the
|
||||||
* previous recovery.
|
* previous recovery.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -709,7 +713,8 @@ int dlm_ls_start(struct dlm_ls *ls)
|
||||||
kfree(rv_old);
|
kfree(rv_old);
|
||||||
}
|
}
|
||||||
|
|
||||||
dlm_recoverd_kick(ls);
|
set_bit(LSFL_RECOVER_WORK, &ls->ls_flags);
|
||||||
|
wake_up_process(ls->ls_recoverd_task);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
|
|
|
@ -581,7 +581,7 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
|
||||||
|
|
||||||
spin_lock(&ls->ls_recover_lock);
|
spin_lock(&ls->ls_recover_lock);
|
||||||
status = ls->ls_recover_status;
|
status = ls->ls_recover_status;
|
||||||
stop = test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
|
stop = test_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
|
||||||
seq = ls->ls_recover_seq;
|
seq = ls->ls_recover_seq;
|
||||||
spin_unlock(&ls->ls_recover_lock);
|
spin_unlock(&ls->ls_recover_lock);
|
||||||
|
|
||||||
|
|
|
@ -41,6 +41,7 @@ static int enable_locking(struct dlm_ls *ls, uint64_t seq)
|
||||||
set_bit(LSFL_RUNNING, &ls->ls_flags);
|
set_bit(LSFL_RUNNING, &ls->ls_flags);
|
||||||
/* unblocks processes waiting to enter the dlm */
|
/* unblocks processes waiting to enter the dlm */
|
||||||
up_write(&ls->ls_in_recovery);
|
up_write(&ls->ls_in_recovery);
|
||||||
|
clear_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
|
||||||
error = 0;
|
error = 0;
|
||||||
}
|
}
|
||||||
spin_unlock(&ls->ls_recover_lock);
|
spin_unlock(&ls->ls_recover_lock);
|
||||||
|
@ -262,7 +263,7 @@ static void do_ls_recovery(struct dlm_ls *ls)
|
||||||
rv = ls->ls_recover_args;
|
rv = ls->ls_recover_args;
|
||||||
ls->ls_recover_args = NULL;
|
ls->ls_recover_args = NULL;
|
||||||
if (rv && ls->ls_recover_seq == rv->seq)
|
if (rv && ls->ls_recover_seq == rv->seq)
|
||||||
clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
|
clear_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
|
||||||
spin_unlock(&ls->ls_recover_lock);
|
spin_unlock(&ls->ls_recover_lock);
|
||||||
|
|
||||||
if (rv) {
|
if (rv) {
|
||||||
|
@ -282,26 +283,34 @@ static int dlm_recoverd(void *arg)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
down_write(&ls->ls_in_recovery);
|
||||||
|
set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
|
||||||
|
wake_up(&ls->ls_recover_lock_wait);
|
||||||
|
|
||||||
while (!kthread_should_stop()) {
|
while (!kthread_should_stop()) {
|
||||||
set_current_state(TASK_INTERRUPTIBLE);
|
set_current_state(TASK_INTERRUPTIBLE);
|
||||||
if (!test_bit(LSFL_WORK, &ls->ls_flags))
|
if (!test_bit(LSFL_RECOVER_WORK, &ls->ls_flags) &&
|
||||||
|
!test_bit(LSFL_RECOVER_DOWN, &ls->ls_flags))
|
||||||
schedule();
|
schedule();
|
||||||
set_current_state(TASK_RUNNING);
|
set_current_state(TASK_RUNNING);
|
||||||
|
|
||||||
if (test_and_clear_bit(LSFL_WORK, &ls->ls_flags))
|
if (test_and_clear_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) {
|
||||||
|
down_write(&ls->ls_in_recovery);
|
||||||
|
set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
|
||||||
|
wake_up(&ls->ls_recover_lock_wait);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (test_and_clear_bit(LSFL_RECOVER_WORK, &ls->ls_flags))
|
||||||
do_ls_recovery(ls);
|
do_ls_recovery(ls);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags))
|
||||||
|
up_write(&ls->ls_in_recovery);
|
||||||
|
|
||||||
dlm_put_lockspace(ls);
|
dlm_put_lockspace(ls);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void dlm_recoverd_kick(struct dlm_ls *ls)
|
|
||||||
{
|
|
||||||
set_bit(LSFL_WORK, &ls->ls_flags);
|
|
||||||
wake_up_process(ls->ls_recoverd_task);
|
|
||||||
}
|
|
||||||
|
|
||||||
int dlm_recoverd_start(struct dlm_ls *ls)
|
int dlm_recoverd_start(struct dlm_ls *ls)
|
||||||
{
|
{
|
||||||
struct task_struct *p;
|
struct task_struct *p;
|
||||||
|
|
|
@ -14,7 +14,6 @@
|
||||||
#ifndef __RECOVERD_DOT_H__
|
#ifndef __RECOVERD_DOT_H__
|
||||||
#define __RECOVERD_DOT_H__
|
#define __RECOVERD_DOT_H__
|
||||||
|
|
||||||
void dlm_recoverd_kick(struct dlm_ls *ls);
|
|
||||||
void dlm_recoverd_stop(struct dlm_ls *ls);
|
void dlm_recoverd_stop(struct dlm_ls *ls);
|
||||||
int dlm_recoverd_start(struct dlm_ls *ls);
|
int dlm_recoverd_start(struct dlm_ls *ls);
|
||||||
void dlm_recoverd_suspend(struct dlm_ls *ls);
|
void dlm_recoverd_suspend(struct dlm_ls *ls);
|
||||||
|
|
Reference in New Issue