From 15a2015fbc692e1c97d7ce12d96e077f5ae7ea6d Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 5 Nov 2011 22:06:31 -0700 Subject: [PATCH 1/3] ceph: fix iput race when queueing inode work If we queue a work item that calls iput(), make sure we ihold() before attempting to queue work. Otherwise our queued work might miraculously run before we notice the queue_work() succeeded and call ihold(), allowing the inode to be destroyed. That is, instead of if (queue_work(...)) ihold(); we need to do ihold(); if (!queue_work(...)) iput(); Reported-by: Amon Ott Signed-off-by: Sage Weil --- fs/ceph/inode.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e392bfce84a..116f36502f1 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1328,12 +1328,13 @@ int ceph_inode_set_size(struct inode *inode, loff_t size) */ void ceph_queue_writeback(struct inode *inode) { + ihold(inode); if (queue_work(ceph_inode_to_client(inode)->wb_wq, &ceph_inode(inode)->i_wb_work)) { dout("ceph_queue_writeback %p\n", inode); - ihold(inode); } else { dout("ceph_queue_writeback %p failed\n", inode); + iput(inode); } } @@ -1353,12 +1354,13 @@ static void ceph_writeback_work(struct work_struct *work) */ void ceph_queue_invalidate(struct inode *inode) { + ihold(inode); if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq, &ceph_inode(inode)->i_pg_inv_work)) { dout("ceph_queue_invalidate %p\n", inode); - ihold(inode); } else { dout("ceph_queue_invalidate %p failed\n", inode); + iput(inode); } } @@ -1434,13 +1436,14 @@ void ceph_queue_vmtruncate(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); + ihold(inode); if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq, &ci->i_vmtruncate_work)) { dout("ceph_queue_vmtruncate %p\n", inode); - ihold(inode); } else { dout("ceph_queue_vmtruncate %p failed, pending=%d\n", inode, ci->i_truncate_pending); + iput(inode); } } From 774ac21da76f5c3018428725074e27a3fd40b128 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 11 Nov 2011 09:48:08 -0800 Subject: [PATCH 2/3] ceph: initialize root dentry Set up d_fsdata on the root dentry. This fixes a NULL pointer dereference in ceph_d_prune on umount. It also means we can eventually strip out all of the conditional checks on d_fsdata because it is now set unconditionally (prior to setting up the d_ops). Fix the ceph_d_prune debug print while we're here. Signed-off-by: Sage Weil --- fs/ceph/dir.c | 2 +- fs/ceph/super.c | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 2abd0dfad7f..bca3948e9db 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1143,7 +1143,7 @@ static void ceph_d_prune(struct dentry *dentry) { struct ceph_dentry_info *di; - dout("d_release %p\n", dentry); + dout("ceph_d_prune %p\n", dentry); /* do we have a valid parent? */ if (!dentry->d_parent || IS_ROOT(dentry)) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index a90846fac75..8dc73a594a9 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -638,10 +638,12 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, if (err == 0) { dout("open_root_inode success\n"); if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT && - fsc->sb->s_root == NULL) + fsc->sb->s_root == NULL) { root = d_alloc_root(req->r_target_inode); - else + ceph_init_dentry(root); + } else { root = d_obtain_alias(req->r_target_inode); + } req->r_target_inode = NULL; dout("open_root_inode success, root dentry is %p\n", root); } else { From 224736d9113ab4a7cf3f05c05377492bd99b4b02 Mon Sep 17 00:00:00 2001 From: Stratos Psomadakis Date: Thu, 10 Nov 2011 15:45:37 +0200 Subject: [PATCH 3/3] libceph: Allocate larger oid buffer in request msgs ceph_osd_request struct allocates a 40-byte buffer for object names. RBD image names can be up to 96 chars long (100 with the .rbd suffix), which results in the object name for the image being truncated, and a subsequent map failure. Increase the oid buffer in request messages, in order to avoid the truncation. Signed-off-by: Stratos Psomadakis Signed-off-by: Sage Weil --- include/linux/ceph/osd_client.h | 8 +++++++- net/ceph/osd_client.c | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index f88eacb111d..7c05ac202d9 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -10,6 +10,12 @@ #include "osdmap.h" #include "messenger.h" +/* + * Maximum object name size + * (must be at least as big as RBD_MAX_MD_NAME_LEN -- currently 100) + */ +#define MAX_OBJ_NAME_SIZE 100 + struct ceph_msg; struct ceph_snap_context; struct ceph_osd_request; @@ -75,7 +81,7 @@ struct ceph_osd_request { struct inode *r_inode; /* for use by callbacks */ void *r_priv; /* ditto */ - char r_oid[40]; /* object name */ + char r_oid[MAX_OBJ_NAME_SIZE]; /* object name */ int r_oid_len; unsigned long r_stamp; /* send OR check time */ diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 733e46008b8..f4f3f58f523 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -244,7 +244,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, ceph_pagelist_init(req->r_trail); } /* create request message; allow space for oid */ - msg_size += 40; + msg_size += MAX_OBJ_NAME_SIZE; if (snapc) msg_size += sizeof(u64) * snapc->num_snaps; if (use_mempool)