dect
/
linux-2.6
Archived
13
0
Fork 0

Various bug fixes for ext4. Perhaps the most serious bug fixed is one

which could cause file system corruptions when performing file punch
 operations.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.12 (GNU/Linux)
 
 iQIcBAABCAAGBQJQ374OAAoJENNvdpvBGATwEGAP/jKUwjQhBZiF0k9dg1kQ5eTz
 bdli4fy1vxrEMIOym8IZa4nBQJVCkArwRgjc28gCBD6k9u6X3GPa26vUydsoPfP6
 odPdc9c9HtsbYQGuaq1SohID5HfjxHewTcUmCs4X4SpGcSurUcT7eQYWqSuIxFHR
 0nKk8NO4EcWh2uqIoGPrc8QpSdor0DXXYYjZmHCeVLH1n6PyoMsnrFMfO9KqMLUL
 vNR54CX9n1GRTfAfJNkNzcwfs8IfNkDUyv5hFpDh15tLltogU0TqnlAl3vSeZGSx
 vVfhwHmQTK/bJyC3YaoRZqq9CQJVk2f/OTBpJDFY/USaapuitJd6vqbmh7NiRNAN
 LaKmFt99MPfwyjEhIA7+J0LCTraAxc536q43oWWK5dAJhWI7DW0lbHARVeQTixNy
 KJ1Lp0pmmz1mX8/lugOnK1SPBF525kTaoiz2bWqg4oQgn7mBzUlgj+EV22/6Rq83
 TpKOKstl4BiZi8t5AhmFiwqtknCDiT5vUKQNy2kuM/oXtPJID/lM/TJbR5viYD3l
 AH3Ef7xj61CynFZ0oBeraGwtXc2BHJpJdWz+8uj0/VhFfC+uNUYapSLFwyiAVZKO
 xxaItT3ylfKpa0AWK6HBc2SLuL72SCHAPks06YKFtSyHtr5C8SCcafxU2DSOSi7K
 VrhkcH6STa77Br7a1ORt
 =9R/D
 -----END PGP SIGNATURE-----

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 bug fixes from Ted Ts'o:
 "Various bug fixes for ext4.  Perhaps the most serious bug fixed is one
  which could cause file system corruptions when performing file punch
  operations."

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: avoid hang when mounting non-journal filesystems with orphan list
  ext4: lock i_mutex when truncating orphan inodes
  ext4: do not try to write superblock on ro remount w/o journal
  ext4: include journal blocks in df overhead calcs
  ext4: remove unaligned AIO warning printk
  ext4: fix an incorrect comment about i_mutex
  ext4: fix deadlock in journal_unmap_buffer()
  ext4: split off ext4_journalled_invalidatepage()
  jbd2: fix assertion failure in jbd2_journal_flush()
  ext4: check dioread_nolock on remount
  ext4: fix extent tree corruption caused by hole punch
This commit is contained in:
Linus Torvalds 2013-01-02 09:57:34 -08:00
commit 5439ca6b8f
9 changed files with 152 additions and 58 deletions

View File

@ -2226,13 +2226,14 @@ errout:
* removes index from the index block. * removes index from the index block.
*/ */
static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path) struct ext4_ext_path *path, int depth)
{ {
int err; int err;
ext4_fsblk_t leaf; ext4_fsblk_t leaf;
/* free index block */ /* free index block */
path--; depth--;
path = path + depth;
leaf = ext4_idx_pblock(path->p_idx); leaf = ext4_idx_pblock(path->p_idx);
if (unlikely(path->p_hdr->eh_entries == 0)) { if (unlikely(path->p_hdr->eh_entries == 0)) {
EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
@ -2257,6 +2258,19 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
ext4_free_blocks(handle, inode, NULL, leaf, 1, ext4_free_blocks(handle, inode, NULL, leaf, 1,
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
while (--depth >= 0) {
if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
break;
path--;
err = ext4_ext_get_access(handle, inode, path);
if (err)
break;
path->p_idx->ei_block = (path+1)->p_idx->ei_block;
err = ext4_ext_dirty(handle, inode, path);
if (err)
break;
}
return err; return err;
} }
@ -2599,7 +2613,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
/* if this leaf is free, then we should /* if this leaf is free, then we should
* remove it from index block above */ * remove it from index block above */
if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
err = ext4_ext_rm_idx(handle, inode, path + depth); err = ext4_ext_rm_idx(handle, inode, path, depth);
out: out:
return err; return err;
@ -2802,7 +2816,7 @@ again:
/* index is empty, remove it; /* index is empty, remove it;
* handle must be already prepared by the * handle must be already prepared by the
* truncatei_leaf() */ * truncatei_leaf() */
err = ext4_ext_rm_idx(handle, inode, path + i); err = ext4_ext_rm_idx(handle, inode, path, i);
} }
/* root level has p_bh == NULL, brelse() eats this */ /* root level has p_bh == NULL, brelse() eats this */
brelse(path[i].p_bh); brelse(path[i].p_bh);

View File

@ -108,14 +108,6 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
/* Unaligned direct AIO must be serialized; see comment above */ /* Unaligned direct AIO must be serialized; see comment above */
if (unaligned_aio) { if (unaligned_aio) {
static unsigned long unaligned_warn_time;
/* Warn about this once per day */
if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ))
ext4_msg(inode->i_sb, KERN_WARNING,
"Unaligned AIO/DIO on inode %ld by %s; "
"performance will be poor.",
inode->i_ino, current->comm);
mutex_lock(ext4_aio_mutex(inode)); mutex_lock(ext4_aio_mutex(inode));
ext4_unwritten_wait(inode); ext4_unwritten_wait(inode);
} }

View File

@ -109,8 +109,6 @@ static int __sync_inode(struct inode *inode, int datasync)
* *
* What we do is just kick off a commit and wait on it. This will snapshot the * What we do is just kick off a commit and wait on it. This will snapshot the
* inode to disk. * inode to disk.
*
* i_mutex lock is held when entering and exiting this function
*/ */
int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)

View File

@ -2880,8 +2880,6 @@ static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offs
static void ext4_invalidatepage(struct page *page, unsigned long offset) static void ext4_invalidatepage(struct page *page, unsigned long offset)
{ {
journal_t *journal = EXT4_JOURNAL(page->mapping->host);
trace_ext4_invalidatepage(page, offset); trace_ext4_invalidatepage(page, offset);
/* /*
@ -2889,16 +2887,34 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
*/ */
if (ext4_should_dioread_nolock(page->mapping->host)) if (ext4_should_dioread_nolock(page->mapping->host))
ext4_invalidatepage_free_endio(page, offset); ext4_invalidatepage_free_endio(page, offset);
/* No journalling happens on data buffers when this function is used */
WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page)));
block_invalidatepage(page, offset);
}
static int __ext4_journalled_invalidatepage(struct page *page,
unsigned long offset)
{
journal_t *journal = EXT4_JOURNAL(page->mapping->host);
trace_ext4_journalled_invalidatepage(page, offset);
/* /*
* If it's a full truncate we just forget about the pending dirtying * If it's a full truncate we just forget about the pending dirtying
*/ */
if (offset == 0) if (offset == 0)
ClearPageChecked(page); ClearPageChecked(page);
if (journal) return jbd2_journal_invalidatepage(journal, page, offset);
jbd2_journal_invalidatepage(journal, page, offset); }
else
block_invalidatepage(page, offset); /* Wrapper for aops... */
static void ext4_journalled_invalidatepage(struct page *page,
unsigned long offset)
{
WARN_ON(__ext4_journalled_invalidatepage(page, offset) < 0);
} }
static int ext4_releasepage(struct page *page, gfp_t wait) static int ext4_releasepage(struct page *page, gfp_t wait)
@ -3264,7 +3280,7 @@ static const struct address_space_operations ext4_journalled_aops = {
.write_end = ext4_journalled_write_end, .write_end = ext4_journalled_write_end,
.set_page_dirty = ext4_journalled_set_page_dirty, .set_page_dirty = ext4_journalled_set_page_dirty,
.bmap = ext4_bmap, .bmap = ext4_bmap,
.invalidatepage = ext4_invalidatepage, .invalidatepage = ext4_journalled_invalidatepage,
.releasepage = ext4_releasepage, .releasepage = ext4_releasepage,
.direct_IO = ext4_direct_IO, .direct_IO = ext4_direct_IO,
.is_partially_uptodate = block_is_partially_uptodate, .is_partially_uptodate = block_is_partially_uptodate,
@ -4304,6 +4320,47 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
return err; return err;
} }
/*
* In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate
* buffers that are attached to a page stradding i_size and are undergoing
* commit. In that case we have to wait for commit to finish and try again.
*/
static void ext4_wait_for_tail_page_commit(struct inode *inode)
{
struct page *page;
unsigned offset;
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
tid_t commit_tid = 0;
int ret;
offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
/*
* All buffers in the last page remain valid? Then there's nothing to
* do. We do the check mainly to optimize the common PAGE_CACHE_SIZE ==
* blocksize case
*/
if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits))
return;
while (1) {
page = find_lock_page(inode->i_mapping,
inode->i_size >> PAGE_CACHE_SHIFT);
if (!page)
return;
ret = __ext4_journalled_invalidatepage(page, offset);
unlock_page(page);
page_cache_release(page);
if (ret != -EBUSY)
return;
commit_tid = 0;
read_lock(&journal->j_state_lock);
if (journal->j_committing_transaction)
commit_tid = journal->j_committing_transaction->t_tid;
read_unlock(&journal->j_state_lock);
if (commit_tid)
jbd2_log_wait_commit(journal, commit_tid);
}
}
/* /*
* ext4_setattr() * ext4_setattr()
* *
@ -4417,16 +4474,28 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
} }
if (attr->ia_valid & ATTR_SIZE) { if (attr->ia_valid & ATTR_SIZE) {
if (attr->ia_size != i_size_read(inode)) { if (attr->ia_size != inode->i_size) {
truncate_setsize(inode, attr->ia_size); loff_t oldsize = inode->i_size;
/* Inode size will be reduced, wait for dio in flight.
* Temporarily disable dioread_nolock to prevent i_size_write(inode, attr->ia_size);
* livelock. */ /*
* Blocks are going to be removed from the inode. Wait
* for dio in flight. Temporarily disable
* dioread_nolock to prevent livelock.
*/
if (orphan) { if (orphan) {
ext4_inode_block_unlocked_dio(inode); if (!ext4_should_journal_data(inode)) {
inode_dio_wait(inode); ext4_inode_block_unlocked_dio(inode);
ext4_inode_resume_unlocked_dio(inode); inode_dio_wait(inode);
ext4_inode_resume_unlocked_dio(inode);
} else
ext4_wait_for_tail_page_commit(inode);
} }
/*
* Truncate pagecache after we've waited for commit
* in data=journal mode to make pages freeable.
*/
truncate_pagecache(inode, oldsize, inode->i_size);
} }
ext4_truncate(inode); ext4_truncate(inode);
} }

View File

@ -2648,7 +2648,8 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
struct ext4_iloc iloc; struct ext4_iloc iloc;
int err = 0; int err = 0;
if (!EXT4_SB(inode->i_sb)->s_journal) if ((!EXT4_SB(inode->i_sb)->s_journal) &&
!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS))
return 0; return 0;
mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);

View File

@ -1645,9 +1645,7 @@ static int parse_options(char *options, struct super_block *sb,
unsigned int *journal_ioprio, unsigned int *journal_ioprio,
int is_remount) int is_remount)
{ {
#ifdef CONFIG_QUOTA
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
#endif
char *p; char *p;
substring_t args[MAX_OPT_ARGS]; substring_t args[MAX_OPT_ARGS];
int token; int token;
@ -1696,6 +1694,16 @@ static int parse_options(char *options, struct super_block *sb,
} }
} }
#endif #endif
if (test_opt(sb, DIOREAD_NOLOCK)) {
int blocksize =
BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
if (blocksize < PAGE_CACHE_SIZE) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"dioread_nolock if block size != PAGE_SIZE");
return 0;
}
}
return 1; return 1;
} }
@ -2212,7 +2220,9 @@ static void ext4_orphan_cleanup(struct super_block *sb,
__func__, inode->i_ino, inode->i_size); __func__, inode->i_ino, inode->i_size);
jbd_debug(2, "truncating inode %lu to %lld bytes\n", jbd_debug(2, "truncating inode %lu to %lld bytes\n",
inode->i_ino, inode->i_size); inode->i_ino, inode->i_size);
mutex_lock(&inode->i_mutex);
ext4_truncate(inode); ext4_truncate(inode);
mutex_unlock(&inode->i_mutex);
nr_truncates++; nr_truncates++;
} else { } else {
ext4_msg(sb, KERN_DEBUG, ext4_msg(sb, KERN_DEBUG,
@ -3223,6 +3233,10 @@ int ext4_calculate_overhead(struct super_block *sb)
memset(buf, 0, PAGE_SIZE); memset(buf, 0, PAGE_SIZE);
cond_resched(); cond_resched();
} }
/* Add the journal blocks as well */
if (sbi->s_journal)
overhead += EXT4_B2C(sbi, sbi->s_journal->j_maxlen);
sbi->s_overhead = overhead; sbi->s_overhead = overhead;
smp_wmb(); smp_wmb();
free_page((unsigned long) buf); free_page((unsigned long) buf);
@ -3436,15 +3450,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
clear_opt(sb, DELALLOC); clear_opt(sb, DELALLOC);
} }
blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
if (test_opt(sb, DIOREAD_NOLOCK)) {
if (blocksize < PAGE_SIZE) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"dioread_nolock if block size != PAGE_SIZE");
goto failed_mount;
}
}
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
@ -3486,6 +3491,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
goto failed_mount; goto failed_mount;
blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
if (blocksize < EXT4_MIN_BLOCK_SIZE || if (blocksize < EXT4_MIN_BLOCK_SIZE ||
blocksize > EXT4_MAX_BLOCK_SIZE) { blocksize > EXT4_MAX_BLOCK_SIZE) {
ext4_msg(sb, KERN_ERR, ext4_msg(sb, KERN_ERR,
@ -4725,7 +4731,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
} }
ext4_setup_system_zone(sb); ext4_setup_system_zone(sb);
if (sbi->s_journal == NULL) if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
ext4_commit_super(sb, 1); ext4_commit_super(sb, 1);
#ifdef CONFIG_QUOTA #ifdef CONFIG_QUOTA

View File

@ -209,7 +209,8 @@ repeat:
if (!new_transaction) if (!new_transaction)
goto alloc_transaction; goto alloc_transaction;
write_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
if (!journal->j_running_transaction) { if (!journal->j_running_transaction &&
!journal->j_barrier_count) {
jbd2_get_transaction(journal, new_transaction); jbd2_get_transaction(journal, new_transaction);
new_transaction = NULL; new_transaction = NULL;
} }
@ -1839,7 +1840,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
BUFFER_TRACE(bh, "entry"); BUFFER_TRACE(bh, "entry");
retry:
/* /*
* It is safe to proceed here without the j_list_lock because the * It is safe to proceed here without the j_list_lock because the
* buffers cannot be stolen by try_to_free_buffers as long as we are * buffers cannot be stolen by try_to_free_buffers as long as we are
@ -1934,14 +1934,11 @@ retry:
* for commit and try again. * for commit and try again.
*/ */
if (partial_page) { if (partial_page) {
tid_t tid = journal->j_committing_transaction->t_tid;
jbd2_journal_put_journal_head(jh); jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
write_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
jbd2_log_wait_commit(journal, tid); return -EBUSY;
goto retry;
} }
/* /*
* OK, buffer won't be reachable after truncate. We just set * OK, buffer won't be reachable after truncate. We just set
@ -2002,21 +1999,23 @@ zap_buffer_unlocked:
* @page: page to flush * @page: page to flush
* @offset: length of page to invalidate. * @offset: length of page to invalidate.
* *
* Reap page buffers containing data after offset in page. * Reap page buffers containing data after offset in page. Can return -EBUSY
* * if buffers are part of the committing transaction and the page is straddling
* i_size. Caller then has to wait for current commit and try again.
*/ */
void jbd2_journal_invalidatepage(journal_t *journal, int jbd2_journal_invalidatepage(journal_t *journal,
struct page *page, struct page *page,
unsigned long offset) unsigned long offset)
{ {
struct buffer_head *head, *bh, *next; struct buffer_head *head, *bh, *next;
unsigned int curr_off = 0; unsigned int curr_off = 0;
int may_free = 1; int may_free = 1;
int ret = 0;
if (!PageLocked(page)) if (!PageLocked(page))
BUG(); BUG();
if (!page_has_buffers(page)) if (!page_has_buffers(page))
return; return 0;
/* We will potentially be playing with lists other than just the /* We will potentially be playing with lists other than just the
* data lists (especially for journaled data mode), so be * data lists (especially for journaled data mode), so be
@ -2030,9 +2029,11 @@ void jbd2_journal_invalidatepage(journal_t *journal,
if (offset <= curr_off) { if (offset <= curr_off) {
/* This block is wholly outside the truncation point */ /* This block is wholly outside the truncation point */
lock_buffer(bh); lock_buffer(bh);
may_free &= journal_unmap_buffer(journal, bh, ret = journal_unmap_buffer(journal, bh, offset > 0);
offset > 0);
unlock_buffer(bh); unlock_buffer(bh);
if (ret < 0)
return ret;
may_free &= ret;
} }
curr_off = next_off; curr_off = next_off;
bh = next; bh = next;
@ -2043,6 +2044,7 @@ void jbd2_journal_invalidatepage(journal_t *journal,
if (may_free && try_to_free_buffers(page)) if (may_free && try_to_free_buffers(page))
J_ASSERT(!page_has_buffers(page)); J_ASSERT(!page_has_buffers(page));
} }
return 0;
} }
/* /*

View File

@ -1098,7 +1098,7 @@ void jbd2_journal_set_triggers(struct buffer_head *,
extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
extern int jbd2_journal_forget (handle_t *, struct buffer_head *); extern int jbd2_journal_forget (handle_t *, struct buffer_head *);
extern void journal_sync_buffer (struct buffer_head *); extern void journal_sync_buffer (struct buffer_head *);
extern void jbd2_journal_invalidatepage(journal_t *, extern int jbd2_journal_invalidatepage(journal_t *,
struct page *, unsigned long); struct page *, unsigned long);
extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
extern int jbd2_journal_stop(handle_t *); extern int jbd2_journal_stop(handle_t *);

View File

@ -451,7 +451,7 @@ DEFINE_EVENT(ext4__page_op, ext4_releasepage,
TP_ARGS(page) TP_ARGS(page)
); );
TRACE_EVENT(ext4_invalidatepage, DECLARE_EVENT_CLASS(ext4_invalidatepage_op,
TP_PROTO(struct page *page, unsigned long offset), TP_PROTO(struct page *page, unsigned long offset),
TP_ARGS(page, offset), TP_ARGS(page, offset),
@ -477,6 +477,18 @@ TRACE_EVENT(ext4_invalidatepage,
(unsigned long) __entry->index, __entry->offset) (unsigned long) __entry->index, __entry->offset)
); );
DEFINE_EVENT(ext4_invalidatepage_op, ext4_invalidatepage,
TP_PROTO(struct page *page, unsigned long offset),
TP_ARGS(page, offset)
);
DEFINE_EVENT(ext4_invalidatepage_op, ext4_journalled_invalidatepage,
TP_PROTO(struct page *page, unsigned long offset),
TP_ARGS(page, offset)
);
TRACE_EVENT(ext4_discard_blocks, TRACE_EVENT(ext4_discard_blocks,
TP_PROTO(struct super_block *sb, unsigned long long blk, TP_PROTO(struct super_block *sb, unsigned long long blk,
unsigned long long count), unsigned long long count),