From d5feaea364281a7e9b80b4712e790ab908d61711 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Thu, 30 Aug 2012 18:52:54 +0200
Subject: [PATCH 1/9] s390/mm: fix deadlock in unmap_hugepage_range()

git commit cd2934a3 moved the flush_tlb_range() within
__unmap_hugepage_range() inside the mm->page_table_lock, which
triggered a deadlock in s390 tlb flushing code. __tlb_flush_mm_cond()
also tries to acquire the mm->page_table_lock, but that is not needed
because all callers already have mm->mmap_sem or mm->page_table_lock,
so it can be safely removed to fix the deadlock.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/tlbflush.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 9fde315f3a7..1d8fe2b17ef 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -90,12 +90,10 @@ static inline void __tlb_flush_mm(struct mm_struct * mm)
 
 static inline void __tlb_flush_mm_cond(struct mm_struct * mm)
 {
-	spin_lock(&mm->page_table_lock);
 	if (mm->context.flush_mm) {
 		__tlb_flush_mm(mm);
 		mm->context.flush_mm = 0;
 	}
-	spin_unlock(&mm->page_table_lock);
 }
 
 /*

From 9016083b7bc4829c4f97f37a1a3102e0f5a37692 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Wed, 5 Sep 2012 14:07:59 +0200
Subject: [PATCH 2/9] s390/hugetlb: use direct TLB flushing for hugetlbfs pages

huge_ptep_get_and_clear() is either missing a TLB invalidation or
an mm->context.attach_count check. Since the attach_count logic was
introduced with normal ptes in mind, let's just use direct TLB
flushing for hugetlbfs pages.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/hugetlb.h | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index 799ed0f1643..2d6e6e38056 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -66,16 +66,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep)
 	return pte;
 }
 
-static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
-					    unsigned long addr, pte_t *ptep)
-{
-	pte_t pte = huge_ptep_get(ptep);
-
-	mm->context.flush_mm = 1;
-	pmd_clear((pmd_t *) ptep);
-	return pte;
-}
-
 static inline void __pmd_csp(pmd_t *pmdp)
 {
 	register unsigned long reg2 asm("2") = pmd_val(*pmdp);
@@ -117,6 +107,15 @@ static inline void huge_ptep_invalidate(struct mm_struct *mm,
 		__pmd_csp(pmdp);
 }
 
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pte_t *ptep)
+{
+	pte_t pte = huge_ptep_get(ptep);
+
+	huge_ptep_invalidate(mm, addr, ptep);
+	return pte;
+}
+
 #define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \
 ({									    \
 	int __changed = !pte_same(huge_ptep_get(__ptep), __entry);	    \
@@ -131,10 +130,7 @@ static inline void huge_ptep_invalidate(struct mm_struct *mm,
 ({									\
 	pte_t __pte = huge_ptep_get(__ptep);				\
 	if (pte_write(__pte)) {						\
-		(__mm)->context.flush_mm = 1;				\
-		if (atomic_read(&(__mm)->context.attach_count) > 1 ||	\
-		    (__mm) != current->active_mm)			\
-			huge_ptep_invalidate(__mm, __addr, __ptep);	\
+		huge_ptep_invalidate(__mm, __addr, __ptep);		\
 		set_huge_pte_at(__mm, __addr, __ptep,			\
 				huge_pte_wrprotect(__pte));		\
 	}								\

From 1f08be80bef6f7a3faaa728db836b47ff742f41f Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Wed, 5 Sep 2012 14:18:22 +0200
Subject: [PATCH 3/9] s390/dasd: move wake_up call

Ensure that all work is done when the process waiting for a
dasd state change is woken up. With this change it is save
to assume that after a userspace triggered state change and
a udev settle invocation there are no unexpected users of a
dasd device.

Acked-by: Stefan Weinhuber <wein@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/dasd.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 15370a2c5ff..6498d15f874 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -534,11 +534,11 @@ static void dasd_change_state(struct dasd_device *device)
 	if (rc)
 		device->target = device->state;
 
-	if (device->state == device->target)
-		wake_up(&dasd_init_waitq);
-
 	/* let user-space know that the device status changed */
 	kobject_uevent(&device->cdev->dev.kobj, KOBJ_CHANGE);
+
+	if (device->state == device->target)
+		wake_up(&dasd_init_waitq);
 }
 
 /*

From 43d0be75af8d05654b88d1da494cf292714fbdec Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Wed, 5 Sep 2012 14:19:42 +0200
Subject: [PATCH 4/9] s390/cio: fix IO subchannel event race

If the subchannel event function is called from IRQ context and we
observe that the subchannel in question is gone we flag the attached
device as not operational and schedule the event function to be called
again from process context where the subchannel gets deregistered.
However if the subchannel reappeared at the time the event function
gets called from process context we would do nothing and leave the
device in not operational state. Recognize this case in sch_get_action
and trigger reexamination of the subchannel/device.

Acked-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/device.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index ed25c8740a9..e8e1a108cdf 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -1426,6 +1426,8 @@ static enum io_sch_action sch_get_action(struct subchannel *sch)
 		return IO_SCH_REPROBE;
 	if (cdev->online)
 		return IO_SCH_VERIFY;
+	if (cdev->private->state == DEV_STATE_NOT_OPER)
+		return IO_SCH_UNREG_ATTACH;
 	return IO_SCH_NOP;
 }
 

From 3368ba25bf387109b24732c1e270c628f20e345d Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Wed, 5 Sep 2012 14:20:41 +0200
Subject: [PATCH 5/9] s390/cio: invalidate cdev pointer before deregistration

Make sure that the cdev pointer for IO subchannels is set to NULL when
we deregister the device (and release its last reference). This will
fix a bug were another process operates on an already freed ccw device.

Acked-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/device.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index e8e1a108cdf..fc916f5d731 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -1521,11 +1521,14 @@ static int io_subchannel_sch_event(struct subchannel *sch, int process)
 			goto out;
 		break;
 	case IO_SCH_UNREG_ATTACH:
+		spin_lock_irqsave(sch->lock, flags);
 		if (cdev->private->flags.resuming) {
 			/* Device will be handled later. */
 			rc = 0;
-			goto out;
+			goto out_unlock;
 		}
+		sch_set_cdev(sch, NULL);
+		spin_unlock_irqrestore(sch->lock, flags);
 		/* Unregister ccw device. */
 		ccw_device_unregister(cdev);
 		break;

From db2e1f43e7b2320d0450fe34e5ac4ac4d8c7708d Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Thu, 6 Sep 2012 09:44:59 +0200
Subject: [PATCH 6/9] s390/hwcaps: do not report high gprs for 31 bit kernel

The bit for high gprs in the AT_HWCAP auxiliary vector field and the
highgprs tag in the output of /proc/cpuinfo should not be set for
31 bit kernels.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/setup.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index f86c81e13c3..40b57693de3 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -974,11 +974,13 @@ static void __init setup_hwcaps(void)
 	if (MACHINE_HAS_HPAGE)
 		elf_hwcap |= HWCAP_S390_HPAGE;
 
+#if defined(CONFIG_64BIT)
 	/*
 	 * 64-bit register support for 31-bit processes
 	 * HWCAP_S390_HIGH_GPRS is bit 9.
 	 */
 	elf_hwcap |= HWCAP_S390_HIGH_GPRS;
+#endif
 
 	get_cpu_id(&cpu_id);
 	switch (cpu_id.machine) {

From 4db84d4f07b87c452c6000e0595dc9570ed21b41 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Mon, 10 Sep 2012 16:43:26 +0200
Subject: [PATCH 7/9] s390/mm: fix user access page-table walk code

The s390 page-table walk code, used for user copy and futex, currently
cannot handle huge pages. As far as user copy is concerned, that is
not really a problem because those functions will only be used on old
hardware that has no huge page support. But the futex code will also
use pagetable walk functions on current hardware when user space runs
in primary space mode. So, if a futex sits in a huge page, the futex
operation on it will result in a page fault loop or even data
corruption.

This patch adds the code for resolving huge page mappings in the user
access pagetable walk code on s390.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/lib/uaccess_pt.c | 142 ++++++++++++++++---------------------
 1 file changed, 63 insertions(+), 79 deletions(-)

diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 60ee2b88379..2d37bb861fa 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -2,69 +2,82 @@
  *  User access functions based on page table walks for enhanced
  *  system layout without hardware support.
  *
- *    Copyright IBM Corp. 2006
+ *    Copyright IBM Corp. 2006, 2012
  *    Author(s): Gerald Schaefer (gerald.schaefer@de.ibm.com)
  */
 
 #include <linux/errno.h>
 #include <linux/hardirq.h>
 #include <linux/mm.h>
+#include <linux/hugetlb.h>
 #include <asm/uaccess.h>
 #include <asm/futex.h>
 #include "uaccess.h"
 
-static inline pte_t *follow_table(struct mm_struct *mm, unsigned long addr)
+
+/*
+ * Returns kernel address for user virtual address. If the returned address is
+ * >= -4095 (IS_ERR_VALUE(x) returns true), a fault has occured and the address
+ * contains the (negative) exception code.
+ */
+static __always_inline unsigned long follow_table(struct mm_struct *mm,
+						  unsigned long addr, int write)
 {
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
+	pte_t *ptep;
 
 	pgd = pgd_offset(mm, addr);
 	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
-		return (pte_t *) 0x3a;
+		return -0x3aUL;
 
 	pud = pud_offset(pgd, addr);
 	if (pud_none(*pud) || unlikely(pud_bad(*pud)))
-		return (pte_t *) 0x3b;
+		return -0x3bUL;
 
 	pmd = pmd_offset(pud, addr);
-	if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
-		return (pte_t *) 0x10;
+	if (pmd_none(*pmd))
+		return -0x10UL;
+	if (pmd_huge(*pmd)) {
+		if (write && (pmd_val(*pmd) & _SEGMENT_ENTRY_RO))
+			return -0x04UL;
+		return (pmd_val(*pmd) & HPAGE_MASK) + (addr & ~HPAGE_MASK);
+	}
+	if (unlikely(pmd_bad(*pmd)))
+		return -0x10UL;
 
-	return pte_offset_map(pmd, addr);
+	ptep = pte_offset_map(pmd, addr);
+	if (!pte_present(*ptep))
+		return -0x11UL;
+	if (write && !pte_write(*ptep))
+		return -0x04UL;
+
+	return (pte_val(*ptep) & PAGE_MASK) + (addr & ~PAGE_MASK);
 }
 
 static __always_inline size_t __user_copy_pt(unsigned long uaddr, void *kptr,
 					     size_t n, int write_user)
 {
 	struct mm_struct *mm = current->mm;
-	unsigned long offset, pfn, done, size;
-	pte_t *pte;
+	unsigned long offset, done, size, kaddr;
 	void *from, *to;
 
 	done = 0;
 retry:
 	spin_lock(&mm->page_table_lock);
 	do {
-		pte = follow_table(mm, uaddr);
-		if ((unsigned long) pte < 0x1000)
+		kaddr = follow_table(mm, uaddr, write_user);
+		if (IS_ERR_VALUE(kaddr))
 			goto fault;
-		if (!pte_present(*pte)) {
-			pte = (pte_t *) 0x11;
-			goto fault;
-		} else if (write_user && !pte_write(*pte)) {
-			pte = (pte_t *) 0x04;
-			goto fault;
-		}
 
-		pfn = pte_pfn(*pte);
-		offset = uaddr & (PAGE_SIZE - 1);
+		offset = uaddr & ~PAGE_MASK;
 		size = min(n - done, PAGE_SIZE - offset);
 		if (write_user) {
-			to = (void *)((pfn << PAGE_SHIFT) + offset);
+			to = (void *) kaddr;
 			from = kptr + done;
 		} else {
-			from = (void *)((pfn << PAGE_SHIFT) + offset);
+			from = (void *) kaddr;
 			to = kptr + done;
 		}
 		memcpy(to, from, size);
@@ -75,7 +88,7 @@ retry:
 	return n - done;
 fault:
 	spin_unlock(&mm->page_table_lock);
-	if (__handle_fault(uaddr, (unsigned long) pte, write_user))
+	if (__handle_fault(uaddr, -kaddr, write_user))
 		return n - done;
 	goto retry;
 }
@@ -84,27 +97,22 @@ fault:
  * Do DAT for user address by page table walk, return kernel address.
  * This function needs to be called with current->mm->page_table_lock held.
  */
-static __always_inline unsigned long __dat_user_addr(unsigned long uaddr)
+static __always_inline unsigned long __dat_user_addr(unsigned long uaddr,
+						     int write)
 {
 	struct mm_struct *mm = current->mm;
-	unsigned long pfn;
-	pte_t *pte;
+	unsigned long kaddr;
 	int rc;
 
 retry:
-	pte = follow_table(mm, uaddr);
-	if ((unsigned long) pte < 0x1000)
+	kaddr = follow_table(mm, uaddr, write);
+	if (IS_ERR_VALUE(kaddr))
 		goto fault;
-	if (!pte_present(*pte)) {
-		pte = (pte_t *) 0x11;
-		goto fault;
-	}
 
-	pfn = pte_pfn(*pte);
-	return (pfn << PAGE_SHIFT) + (uaddr & (PAGE_SIZE - 1));
+	return kaddr;
 fault:
 	spin_unlock(&mm->page_table_lock);
-	rc = __handle_fault(uaddr, (unsigned long) pte, 0);
+	rc = __handle_fault(uaddr, -kaddr, write);
 	spin_lock(&mm->page_table_lock);
 	if (!rc)
 		goto retry;
@@ -159,11 +167,9 @@ static size_t clear_user_pt(size_t n, void __user *to)
 
 static size_t strnlen_user_pt(size_t count, const char __user *src)
 {
-	char *addr;
 	unsigned long uaddr = (unsigned long) src;
 	struct mm_struct *mm = current->mm;
-	unsigned long offset, pfn, done, len;
-	pte_t *pte;
+	unsigned long offset, done, len, kaddr;
 	size_t len_str;
 
 	if (segment_eq(get_fs(), KERNEL_DS))
@@ -172,19 +178,13 @@ static size_t strnlen_user_pt(size_t count, const char __user *src)
 retry:
 	spin_lock(&mm->page_table_lock);
 	do {
-		pte = follow_table(mm, uaddr);
-		if ((unsigned long) pte < 0x1000)
+		kaddr = follow_table(mm, uaddr, 0);
+		if (IS_ERR_VALUE(kaddr))
 			goto fault;
-		if (!pte_present(*pte)) {
-			pte = (pte_t *) 0x11;
-			goto fault;
-		}
 
-		pfn = pte_pfn(*pte);
-		offset = uaddr & (PAGE_SIZE-1);
-		addr = (char *)(pfn << PAGE_SHIFT) + offset;
+		offset = uaddr & ~PAGE_MASK;
 		len = min(count - done, PAGE_SIZE - offset);
-		len_str = strnlen(addr, len);
+		len_str = strnlen((char *) kaddr, len);
 		done += len_str;
 		uaddr += len_str;
 	} while ((len_str == len) && (done < count));
@@ -192,7 +192,7 @@ retry:
 	return done + 1;
 fault:
 	spin_unlock(&mm->page_table_lock);
-	if (__handle_fault(uaddr, (unsigned long) pte, 0))
+	if (__handle_fault(uaddr, -kaddr, 0))
 		return 0;
 	goto retry;
 }
@@ -225,11 +225,10 @@ static size_t copy_in_user_pt(size_t n, void __user *to,
 			      const void __user *from)
 {
 	struct mm_struct *mm = current->mm;
-	unsigned long offset_from, offset_to, offset_max, pfn_from, pfn_to,
-		      uaddr, done, size, error_code;
+	unsigned long offset_max, uaddr, done, size, error_code;
 	unsigned long uaddr_from = (unsigned long) from;
 	unsigned long uaddr_to = (unsigned long) to;
-	pte_t *pte_from, *pte_to;
+	unsigned long kaddr_to, kaddr_from;
 	int write_user;
 
 	if (segment_eq(get_fs(), KERNEL_DS)) {
@@ -242,38 +241,23 @@ retry:
 	do {
 		write_user = 0;
 		uaddr = uaddr_from;
-		pte_from = follow_table(mm, uaddr_from);
-		error_code = (unsigned long) pte_from;
-		if (error_code < 0x1000)
+		kaddr_from = follow_table(mm, uaddr_from, 0);
+		error_code = kaddr_from;
+		if (IS_ERR_VALUE(error_code))
 			goto fault;
-		if (!pte_present(*pte_from)) {
-			error_code = 0x11;
-			goto fault;
-		}
 
 		write_user = 1;
 		uaddr = uaddr_to;
-		pte_to = follow_table(mm, uaddr_to);
-		error_code = (unsigned long) pte_to;
-		if (error_code < 0x1000)
+		kaddr_to = follow_table(mm, uaddr_to, 1);
+		error_code = (unsigned long) kaddr_to;
+		if (IS_ERR_VALUE(error_code))
 			goto fault;
-		if (!pte_present(*pte_to)) {
-			error_code = 0x11;
-			goto fault;
-		} else if (!pte_write(*pte_to)) {
-			error_code = 0x04;
-			goto fault;
-		}
 
-		pfn_from = pte_pfn(*pte_from);
-		pfn_to = pte_pfn(*pte_to);
-		offset_from = uaddr_from & (PAGE_SIZE-1);
-		offset_to = uaddr_from & (PAGE_SIZE-1);
-		offset_max = max(offset_from, offset_to);
+		offset_max = max(uaddr_from & ~PAGE_MASK,
+				 uaddr_to & ~PAGE_MASK);
 		size = min(n - done, PAGE_SIZE - offset_max);
 
-		memcpy((void *)(pfn_to << PAGE_SHIFT) + offset_to,
-		       (void *)(pfn_from << PAGE_SHIFT) + offset_from, size);
+		memcpy((void *) kaddr_to, (void *) kaddr_from, size);
 		done += size;
 		uaddr_from += size;
 		uaddr_to += size;
@@ -282,7 +266,7 @@ retry:
 	return n - done;
 fault:
 	spin_unlock(&mm->page_table_lock);
-	if (__handle_fault(uaddr, error_code, write_user))
+	if (__handle_fault(uaddr, -error_code, write_user))
 		return n - done;
 	goto retry;
 }
@@ -341,7 +325,7 @@ int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old)
 		return __futex_atomic_op_pt(op, uaddr, oparg, old);
 	spin_lock(&current->mm->page_table_lock);
 	uaddr = (u32 __force __user *)
-		__dat_user_addr((__force unsigned long) uaddr);
+		__dat_user_addr((__force unsigned long) uaddr, 1);
 	if (!uaddr) {
 		spin_unlock(&current->mm->page_table_lock);
 		return -EFAULT;
@@ -378,7 +362,7 @@ int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr,
 		return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval);
 	spin_lock(&current->mm->page_table_lock);
 	uaddr = (u32 __force __user *)
-		__dat_user_addr((__force unsigned long) uaddr);
+		__dat_user_addr((__force unsigned long) uaddr, 1);
 	if (!uaddr) {
 		spin_unlock(&current->mm->page_table_lock);
 		return -EFAULT;

From 12d7b1078bb374fc3e2955b9f2815415a66157b6 Mon Sep 17 00:00:00 2001
From: Stefan Haberland <stefan.haberland@de.ibm.com>
Date: Tue, 11 Sep 2012 15:10:58 +0200
Subject: [PATCH 8/9] s390/dasd: fix pathgroup race

If a new path is available we need to verify the path data. If it is the
first path for a device the stop bits are removed after path verification.
If a pathgroup is established we need to set system characteristics for
the lcu. Therefore I/O has to be started.
If the device is stopped the set system characteristics worker may block
the path verification worker and the device is blocked.

Turn on failfast for set system characteristics CQR to prevent a deadlock
with the path verification worker.

If a pathgroup is established on a device that is not in use trigger path
verification. Maybe we were not informed about a working path.

Signed-off-by: Stefan Haberland <stefan.haberland@de.ibm.com>
Reviewed-by: Stefan Weinhuber <wein@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/dasd.c      | 11 +++++++++++
 drivers/s390/block/dasd_eckd.c | 32 +++++++++++++++++++++++++-------
 2 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 6498d15f874..0595c763daf 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -2157,6 +2157,7 @@ static int _dasd_sleep_on(struct dasd_ccw_req *maincqr, int interruptible)
 		    test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) &&
 		    (!dasd_eer_enabled(device))) {
 			cqr->status = DASD_CQR_FAILED;
+			cqr->intrc = -EAGAIN;
 			continue;
 		}
 		/* Don't try to start requests if device is stopped */
@@ -3270,6 +3271,16 @@ void dasd_generic_path_event(struct ccw_device *cdev, int *path_event)
 			dasd_schedule_device_bh(device);
 		}
 		if (path_event[chp] & PE_PATHGROUP_ESTABLISHED) {
+			if (!(device->path_data.opm & eventlpm) &&
+			    !(device->path_data.tbvpm & eventlpm)) {
+				/*
+				 * we can not establish a pathgroup on an
+				 * unavailable path, so trigger a path
+				 * verification first
+				 */
+				device->path_data.tbvpm |= eventlpm;
+				dasd_schedule_device_bh(device);
+			}
 			DBF_DEV_EVENT(DBF_WARNING, device, "%s",
 				      "Pathgroup re-established\n");
 			if (device->discipline->kick_validate)
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 2fb2b9ea97e..c48c72abbef 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -1507,7 +1507,8 @@ static struct dasd_ccw_req *dasd_eckd_build_psf_ssc(struct dasd_device *device,
  * call might change behaviour of DASD devices.
  */
 static int
-dasd_eckd_psf_ssc(struct dasd_device *device, int enable_pav)
+dasd_eckd_psf_ssc(struct dasd_device *device, int enable_pav,
+		  unsigned long flags)
 {
 	struct dasd_ccw_req *cqr;
 	int rc;
@@ -1516,10 +1517,19 @@ dasd_eckd_psf_ssc(struct dasd_device *device, int enable_pav)
 	if (IS_ERR(cqr))
 		return PTR_ERR(cqr);
 
+	/*
+	 * set flags e.g. turn on failfast, to prevent blocking
+	 * the calling function should handle failed requests
+	 */
+	cqr->flags |= flags;
+
 	rc = dasd_sleep_on(cqr);
 	if (!rc)
 		/* trigger CIO to reprobe devices */
 		css_schedule_reprobe();
+	else if (cqr->intrc == -EAGAIN)
+		rc = -EAGAIN;
+
 	dasd_sfree_request(cqr, cqr->memdev);
 	return rc;
 }
@@ -1527,7 +1537,8 @@ dasd_eckd_psf_ssc(struct dasd_device *device, int enable_pav)
 /*
  * Valide storage server of current device.
  */
-static void dasd_eckd_validate_server(struct dasd_device *device)
+static int dasd_eckd_validate_server(struct dasd_device *device,
+				     unsigned long flags)
 {
 	int rc;
 	struct dasd_eckd_private *private;
@@ -1536,17 +1547,18 @@ static void dasd_eckd_validate_server(struct dasd_device *device)
 	private = (struct dasd_eckd_private *) device->private;
 	if (private->uid.type == UA_BASE_PAV_ALIAS ||
 	    private->uid.type == UA_HYPER_PAV_ALIAS)
-		return;
+		return 0;
 	if (dasd_nopav || MACHINE_IS_VM)
 		enable_pav = 0;
 	else
 		enable_pav = 1;
-	rc = dasd_eckd_psf_ssc(device, enable_pav);
+	rc = dasd_eckd_psf_ssc(device, enable_pav, flags);
 
 	/* may be requested feature is not available on server,
 	 * therefore just report error and go ahead */
 	DBF_EVENT_DEVID(DBF_WARNING, device->cdev, "PSF-SSC for SSID %04x "
 			"returned rc=%d", private->uid.ssid, rc);
+	return rc;
 }
 
 /*
@@ -1556,7 +1568,13 @@ static void dasd_eckd_do_validate_server(struct work_struct *work)
 {
 	struct dasd_device *device = container_of(work, struct dasd_device,
 						  kick_validate);
-	dasd_eckd_validate_server(device);
+	if (dasd_eckd_validate_server(device, DASD_CQR_FLAGS_FAILFAST)
+	    == -EAGAIN) {
+		/* schedule worker again if failed */
+		schedule_work(&device->kick_validate);
+		return;
+	}
+
 	dasd_put_device(device);
 }
 
@@ -1685,7 +1703,7 @@ dasd_eckd_check_characteristics(struct dasd_device *device)
 	if (rc)
 		goto out_err2;
 
-	dasd_eckd_validate_server(device);
+	dasd_eckd_validate_server(device, 0);
 
 	/* device may report different configuration data after LCU setup */
 	rc = dasd_eckd_read_conf(device);
@@ -4153,7 +4171,7 @@ static int dasd_eckd_restore_device(struct dasd_device *device)
 	rc = dasd_alias_make_device_known_to_lcu(device);
 	if (rc)
 		return rc;
-	dasd_eckd_validate_server(device);
+	dasd_eckd_validate_server(device, DASD_CQR_FLAGS_FAILFAST);
 
 	/* RE-Read Configuration Data */
 	rc = dasd_eckd_read_conf(device);

From 03429f34c22cedc3300a81e13d5790450cc468af Mon Sep 17 00:00:00 2001
From: Stefan Haberland <stefan.haberland@de.ibm.com>
Date: Tue, 11 Sep 2012 17:19:12 +0200
Subject: [PATCH 9/9] s390/dasd: fix read unit address configuration loop

Read unit address is done for all devices during online processing to read
out LCU features. This is also done after disconnect/connect a LCU.
Some older storage hardware does not provide the capability to read unit
address configuration.
This leads to a loop trying to read unit address configuration every 30
seconds. The device is still operational but logs are flooded with error
messages.

Fix the loop by recognizing a command reject saying that the suborder
for ruac is not supported.

Signed-off-by: Stefan Haberland <stefan.haberland@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/dasd_alias.c | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c
index 157defe5e06..6b556995bb3 100644
--- a/drivers/s390/block/dasd_alias.c
+++ b/drivers/s390/block/dasd_alias.c
@@ -384,6 +384,29 @@ static void _remove_device_from_lcu(struct alias_lcu *lcu,
 		group->next = NULL;
 };
 
+static int
+suborder_not_supported(struct dasd_ccw_req *cqr)
+{
+	char *sense;
+	char reason;
+	char msg_format;
+	char msg_no;
+
+	sense = dasd_get_sense(&cqr->irb);
+	if (!sense)
+		return 0;
+
+	reason = sense[0];
+	msg_format = (sense[7] & 0xF0);
+	msg_no = (sense[7] & 0x0F);
+
+	/* command reject, Format 0 MSG 4 - invalid parameter */
+	if ((reason == 0x80) && (msg_format == 0x00) && (msg_no == 0x04))
+		return 1;
+
+	return 0;
+}
+
 static int read_unit_address_configuration(struct dasd_device *device,
 					   struct alias_lcu *lcu)
 {
@@ -435,6 +458,8 @@ static int read_unit_address_configuration(struct dasd_device *device,
 
 	do {
 		rc = dasd_sleep_on(cqr);
+		if (rc && suborder_not_supported(cqr))
+			return -EOPNOTSUPP;
 	} while (rc && (cqr->retries > 0));
 	if (rc) {
 		spin_lock_irqsave(&lcu->lock, flags);
@@ -521,7 +546,7 @@ static void lcu_update_work(struct work_struct *work)
 	 * processing the data
 	 */
 	spin_lock_irqsave(&lcu->lock, flags);
-	if (rc || (lcu->flags & NEED_UAC_UPDATE)) {
+	if ((rc && (rc != -EOPNOTSUPP)) || (lcu->flags & NEED_UAC_UPDATE)) {
 		DBF_DEV_EVENT(DBF_WARNING, device, "could not update"
 			    " alias data in lcu (rc = %d), retry later", rc);
 		schedule_delayed_work(&lcu->ruac_data.dwork, 30*HZ);