md/raid5: use bad-block log to improve handling of uncorrectable read errors.
If we get an uncorrectable read error - record a bad block rather than failing the device. And if these errors (which may be due to known bad blocks) cause recovery to be impossible, record a bad block on the recovering devices, or abort the recovery. As we might abort a recovery without failing a device we need to teach RAID5 about recovery_disabled handling. Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
parent
31c176ecdf
commit
7f0da59bdc
|
@ -2232,9 +2232,18 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh,
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
rdev = rcu_dereference(conf->disks[i].rdev);
|
rdev = rcu_dereference(conf->disks[i].rdev);
|
||||||
if (rdev && test_bit(In_sync, &rdev->flags))
|
if (rdev && test_bit(In_sync, &rdev->flags))
|
||||||
/* multiple read failures in one stripe */
|
atomic_inc(&rdev->nr_pending);
|
||||||
md_error(conf->mddev, rdev);
|
else
|
||||||
|
rdev = NULL;
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
if (rdev) {
|
||||||
|
if (!rdev_set_badblocks(
|
||||||
|
rdev,
|
||||||
|
sh->sector,
|
||||||
|
STRIPE_SECTORS, 0))
|
||||||
|
md_error(conf->mddev, rdev);
|
||||||
|
rdev_dec_pending(rdev, conf->mddev);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
spin_lock_irq(&conf->device_lock);
|
spin_lock_irq(&conf->device_lock);
|
||||||
/* fail all writes first */
|
/* fail all writes first */
|
||||||
|
@ -2313,6 +2322,41 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh,
|
||||||
md_wakeup_thread(conf->mddev->thread);
|
md_wakeup_thread(conf->mddev->thread);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
handle_failed_sync(raid5_conf_t *conf, struct stripe_head *sh,
|
||||||
|
struct stripe_head_state *s)
|
||||||
|
{
|
||||||
|
int abort = 0;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
md_done_sync(conf->mddev, STRIPE_SECTORS, 0);
|
||||||
|
clear_bit(STRIPE_SYNCING, &sh->state);
|
||||||
|
s->syncing = 0;
|
||||||
|
/* There is nothing more to do for sync/check/repair.
|
||||||
|
* For recover we need to record a bad block on all
|
||||||
|
* non-sync devices, or abort the recovery
|
||||||
|
*/
|
||||||
|
if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery))
|
||||||
|
return;
|
||||||
|
/* During recovery devices cannot be removed, so locking and
|
||||||
|
* refcounting of rdevs is not needed
|
||||||
|
*/
|
||||||
|
for (i = 0; i < conf->raid_disks; i++) {
|
||||||
|
mdk_rdev_t *rdev = conf->disks[i].rdev;
|
||||||
|
if (!rdev
|
||||||
|
|| test_bit(Faulty, &rdev->flags)
|
||||||
|
|| test_bit(In_sync, &rdev->flags))
|
||||||
|
continue;
|
||||||
|
if (!rdev_set_badblocks(rdev, sh->sector,
|
||||||
|
STRIPE_SECTORS, 0))
|
||||||
|
abort = 1;
|
||||||
|
}
|
||||||
|
if (abort) {
|
||||||
|
conf->recovery_disabled = conf->mddev->recovery_disabled;
|
||||||
|
set_bit(MD_RECOVERY_INTR, &conf->mddev->recovery);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* fetch_block - checks the given member device to see if its data needs
|
/* fetch_block - checks the given member device to see if its data needs
|
||||||
* to be read or computed to satisfy a request.
|
* to be read or computed to satisfy a request.
|
||||||
*
|
*
|
||||||
|
@ -3067,11 +3111,8 @@ static void handle_stripe(struct stripe_head *sh)
|
||||||
*/
|
*/
|
||||||
if (s.failed > conf->max_degraded && s.to_read+s.to_write+s.written)
|
if (s.failed > conf->max_degraded && s.to_read+s.to_write+s.written)
|
||||||
handle_failed_stripe(conf, sh, &s, disks, &s.return_bi);
|
handle_failed_stripe(conf, sh, &s, disks, &s.return_bi);
|
||||||
if (s.failed > conf->max_degraded && s.syncing) {
|
if (s.failed > conf->max_degraded && s.syncing)
|
||||||
md_done_sync(conf->mddev, STRIPE_SECTORS, 0);
|
handle_failed_sync(conf, sh, &s);
|
||||||
clear_bit(STRIPE_SYNCING, &sh->state);
|
|
||||||
s.syncing = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* might be able to return some write requests if the parity blocks
|
* might be able to return some write requests if the parity blocks
|
||||||
|
@ -4976,6 +5017,7 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
|
||||||
* isn't possible.
|
* isn't possible.
|
||||||
*/
|
*/
|
||||||
if (!test_bit(Faulty, &rdev->flags) &&
|
if (!test_bit(Faulty, &rdev->flags) &&
|
||||||
|
mddev->recovery_disabled != conf->recovery_disabled &&
|
||||||
!has_failed(conf) &&
|
!has_failed(conf) &&
|
||||||
number < conf->raid_disks) {
|
number < conf->raid_disks) {
|
||||||
err = -EBUSY;
|
err = -EBUSY;
|
||||||
|
@ -5004,6 +5046,9 @@ static int raid5_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||||
int first = 0;
|
int first = 0;
|
||||||
int last = conf->raid_disks - 1;
|
int last = conf->raid_disks - 1;
|
||||||
|
|
||||||
|
if (mddev->recovery_disabled == conf->recovery_disabled)
|
||||||
|
return -EBUSY;
|
||||||
|
|
||||||
if (has_failed(conf))
|
if (has_failed(conf))
|
||||||
/* no point adding a device */
|
/* no point adding a device */
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
|
@ -399,7 +399,7 @@ struct raid5_private_data {
|
||||||
* (fresh device added).
|
* (fresh device added).
|
||||||
* Cleared when a sync completes.
|
* Cleared when a sync completes.
|
||||||
*/
|
*/
|
||||||
|
int recovery_disabled;
|
||||||
/* per cpu variables */
|
/* per cpu variables */
|
||||||
struct raid5_percpu {
|
struct raid5_percpu {
|
||||||
struct page *spare_page; /* Used when checking P/Q in raid6 */
|
struct page *spare_page; /* Used when checking P/Q in raid6 */
|
||||||
|
|
Reference in New Issue