From 61600ef8483924039dcdec8b4717ca32bd3353c6 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 28 May 2012 14:44:30 +0800 Subject: [PATCH 1/5] ceph: check PG_Private flag before accessing page->private I got lots of NULL pointer dereference Oops when compiling kernel on ceph. The bug is because the kernel page migration routine replaces some pages in the page cache with new pages, these new pages' private can be non-zero. Signed-off-by: Zheng Yan Signed-off-by: Sage Weil (cherry picked from commit 28c0254ede13ab575d2df5c6585ed3d4817c3e6b) --- fs/ceph/addr.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 173b1d22e59..8b67304e4b8 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -54,7 +54,12 @@ (CONGESTION_ON_THRESH(congestion_kb) - \ (CONGESTION_ON_THRESH(congestion_kb) >> 2)) - +static inline struct ceph_snap_context *page_snap_context(struct page *page) +{ + if (PagePrivate(page)) + return (void *)page->private; + return NULL; +} /* * Dirty a page. Optimistically adjust accounting, on the assumption @@ -142,10 +147,9 @@ static void ceph_invalidatepage(struct page *page, unsigned long offset) { struct inode *inode; struct ceph_inode_info *ci; - struct ceph_snap_context *snapc = (void *)page->private; + struct ceph_snap_context *snapc = page_snap_context(page); BUG_ON(!PageLocked(page)); - BUG_ON(!page->private); BUG_ON(!PagePrivate(page)); BUG_ON(!page->mapping); @@ -182,7 +186,6 @@ static int ceph_releasepage(struct page *page, gfp_t g) struct inode *inode = page->mapping ? page->mapping->host : NULL; dout("%p releasepage %p idx %lu\n", inode, page, page->index); WARN_ON(PageDirty(page)); - WARN_ON(page->private); WARN_ON(PagePrivate(page)); return 0; } @@ -443,7 +446,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) osdc = &fsc->client->osdc; /* verify this is a writeable snap context */ - snapc = (void *)page->private; + snapc = page_snap_context(page); if (snapc == NULL) { dout("writepage %p page %p not dirty?\n", inode, page); goto out; @@ -451,7 +454,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) oldest = get_oldest_context(inode, &snap_size); if (snapc->seq > oldest->seq) { dout("writepage %p page %p snapc %p not writeable - noop\n", - inode, page, (void *)page->private); + inode, page, snapc); /* we should only noop if called by kswapd */ WARN_ON((current->flags & PF_MEMALLOC) == 0); ceph_put_snap_context(oldest); @@ -591,7 +594,7 @@ static void writepages_finish(struct ceph_osd_request *req, clear_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC); - ceph_put_snap_context((void *)page->private); + ceph_put_snap_context(page_snap_context(page)); page->private = 0; ClearPagePrivate(page); dout("unlocking %d %p\n", i, page); @@ -795,7 +798,7 @@ get_more_pages: } /* only if matching snap context */ - pgsnapc = (void *)page->private; + pgsnapc = page_snap_context(page); if (pgsnapc->seq > snapc->seq) { dout("page snapc %p %lld > oldest %p %lld\n", pgsnapc, pgsnapc->seq, snapc, snapc->seq); @@ -984,7 +987,7 @@ retry_locked: BUG_ON(!ci->i_snap_realm); down_read(&mdsc->snap_rwsem); BUG_ON(!ci->i_snap_realm->cached_context); - snapc = (void *)page->private; + snapc = page_snap_context(page); if (snapc && snapc != ci->i_head_snapc) { /* * this page is already dirty in another (older) snap From 680584fab05efff732b5ae16ad601ba994d7b505 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 4 Jun 2012 14:43:32 -0500 Subject: [PATCH 2/5] libceph: osd_client: don't drop reply reference too early In ceph_osdc_release_request(), a reference to the r_reply message is dropped. But just after that, that same message is revoked if it was in use to receive an incoming reply. Reorder these so we are sure we hold a reference until we're actually done with the message. Signed-off-by: Alex Elder Reviewed-by: Sage Weil (cherry picked from commit ab8cb34a4b2f60281a4b18b1f1ad23bc2313d91b) --- net/ceph/osd_client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 1ffebed5ce0..13538da41dd 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -139,8 +139,6 @@ void ceph_osdc_release_request(struct kref *kref) if (req->r_request) ceph_msg_put(req->r_request); - if (req->r_reply) - ceph_msg_put(req->r_reply); if (req->r_con_filling_msg) { dout("release_request revoking pages %p from con %p\n", req->r_pages, req->r_con_filling_msg); @@ -148,6 +146,8 @@ void ceph_osdc_release_request(struct kref *kref) req->r_reply); ceph_con_put(req->r_con_filling_msg); } + if (req->r_reply) + ceph_msg_put(req->r_reply); if (req->r_own_pages) ceph_release_page_vector(req->r_pages, req->r_num_pages); From 88ed6ea0b295f8e2383d599a04027ec596cdf97b Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 31 May 2012 20:22:18 -0700 Subject: [PATCH 3/5] libceph: use con get/put ops from osd_client There were a few direct calls to ceph_con_{get,put}() instead of the con ops from osd_client.c. This is a bug since those ops aren't defined to be ceph_con_get/put. This breaks refcounting on the ceph_osd structs that contain the ceph_connections, and could lead to all manner of strangeness. The purpose of the ->get and ->put methods in a ceph connection are to allow the connection to indicate it has a reference to something external to the messaging system, *not* to indicate something external has a reference to the connection. [elder@inktank.com: added that last sentence] Signed-off-by: Sage Weil Reviewed-by: Alex Elder (cherry picked from commit 0d47766f14211a73eaf54cab234db134ece79f49) --- net/ceph/osd_client.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 13538da41dd..ca59e66c978 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -144,7 +144,7 @@ void ceph_osdc_release_request(struct kref *kref) req->r_pages, req->r_con_filling_msg); ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply); - ceph_con_put(req->r_con_filling_msg); + req->r_con_filling_msg->ops->put(req->r_con_filling_msg); } if (req->r_reply) ceph_msg_put(req->r_reply); @@ -1216,7 +1216,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, if (req->r_con_filling_msg == con && req->r_reply == msg) { dout(" dropping con_filling_msg ref %p\n", con); req->r_con_filling_msg = NULL; - ceph_con_put(con); + con->ops->put(con); } if (!req->r_got_reply) { @@ -2028,7 +2028,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, dout("get_reply revoking msg %p from old con %p\n", req->r_reply, req->r_con_filling_msg); ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply); - ceph_con_put(req->r_con_filling_msg); + req->r_con_filling_msg->ops->put(req->r_con_filling_msg); req->r_con_filling_msg = NULL; } @@ -2063,7 +2063,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, #endif } *skip = 0; - req->r_con_filling_msg = ceph_con_get(con); + req->r_con_filling_msg = con->ops->get(con); dout("get_reply tid %lld %p\n", tid, m); out: From b132cf4c733f91bb4dd2277ea049243cf16e8b66 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 6 Jun 2012 19:35:55 -0500 Subject: [PATCH 4/5] rbd: Clear ceph_msg->bio_iter for retransmitted message The bug can cause NULL pointer dereference in write_partial_msg_pages Signed-off-by: Zheng Yan Reviewed-by: Alex Elder (cherry picked from commit 43643528cce60ca184fe8197efa8e8da7c89a037) --- net/ceph/messenger.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 524f4e4f598..b332c3d7605 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -563,6 +563,10 @@ static void prepare_write_message(struct ceph_connection *con) m->hdr.seq = cpu_to_le64(++con->out_seq); m->needs_out_seq = false; } +#ifdef CONFIG_BLOCK + else + m->bio_iter = NULL; +#endif dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", m, con->out_seq, le16_to_cpu(m->hdr.type), From 642c0dbde32f34baa7886e988a067089992adc8f Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 10 Jun 2012 20:43:56 -0700 Subject: [PATCH 5/5] libceph: flush msgr queue during mon_client shutdown We need to flush the msgr workqueue during mon_client shutdown to ensure that any work affecting our embedded ceph_connection is finished so that we can be safely destroyed. Previously, we were flushing the work queue after osd_client shutdown and before mon_client shutdown to ensure that any osd connection refs to authorizers are flushed. Remove the redundant flush, and document in the comment that the mon_client flush is needed to cover that case as well. Signed-off-by: Sage Weil Reviewed-by: Alex Elder (cherry picked from commit f3dea7edd3d449fe7a6d402c1ce56a294b985261) --- net/ceph/ceph_common.c | 7 ------- net/ceph/mon_client.c | 8 ++++++++ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index a776f751edb..ba4323bce0e 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -504,13 +504,6 @@ void ceph_destroy_client(struct ceph_client *client) /* unmount */ ceph_osdc_stop(&client->osdc); - /* - * make sure osd connections close out before destroying the - * auth module, which is needed to free those connections' - * ceph_authorizers. - */ - ceph_msgr_flush(); - ceph_monc_stop(&client->monc); ceph_debugfs_client_cleanup(client); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 10d6008d31f..d0649a9655b 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -847,6 +847,14 @@ void ceph_monc_stop(struct ceph_mon_client *monc) mutex_unlock(&monc->mutex); + /* + * flush msgr queue before we destroy ourselves to ensure that: + * - any work that references our embedded con is finished. + * - any osd_client or other work that may reference an authorizer + * finishes before we shut down the auth subsystem. + */ + ceph_msgr_flush(); + ceph_auth_destroy(monc->auth); ceph_msg_put(monc->m_auth);