diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 041b2227967d04bbddff05b1cfbd3a90b5bfc27b..bdc9eebd1d442fb3afdc0a7cce575d749285aad6 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -284,35 +284,39 @@ label##_hv:								\
 	rlwimi	r11,r12,0,MSR_EE;		\
 	mtmsrd	r11,1
 
-#define STD_EXCEPTION_COMMON(trap, label, hdlr)		\
-	.align	7;					\
-	.globl label##_common;				\
-label##_common:						\
-	EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN);	\
-	DISABLE_INTS;					\
-	bl	.save_nvgprs;				\
-	addi	r3,r1,STACK_FRAME_OVERHEAD;		\
-	bl	hdlr;					\
-	b	.ret_from_except
+#define ADD_NVGPRS				\
+	bl	.save_nvgprs
+
+#define RUNLATCH_ON				\
+BEGIN_FTR_SECTION				\
+	clrrdi	r3,r1,THREAD_SHIFT;		\
+	ld	r4,TI_LOCAL_FLAGS(r3);		\
+	andi.	r0,r4,_TLF_RUNLATCH;		\
+	beql	ppc64_runlatch_on_trampoline;	\
+END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
+
+#define EXCEPTION_COMMON(trap, label, hdlr, ret, additions)	\
+	.align	7;						\
+	.globl label##_common;					\
+label##_common:							\
+	EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN);		\
+	additions;						\
+	addi	r3,r1,STACK_FRAME_OVERHEAD;			\
+	bl	hdlr;						\
+	b	ret
+
+#define STD_EXCEPTION_COMMON(trap, label, hdlr)			\
+	EXCEPTION_COMMON(trap, label, hdlr, ret_from_except,	\
+			 ADD_NVGPRS;DISABLE_INTS)
 
 /*
  * Like STD_EXCEPTION_COMMON, but for exceptions that can occur
  * in the idle task and therefore need the special idle handling
  * (finish nap and runlatch)
  */
-#define STD_EXCEPTION_COMMON_ASYNC(trap, label, hdlr)	\
-	.align	7;					\
-	.globl label##_common;				\
-label##_common:						\
-	EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN);	\
-	FINISH_NAP;					\
-	DISABLE_INTS;					\
-BEGIN_FTR_SECTION					\
-	bl	.ppc64_runlatch_on;			\
-END_FTR_SECTION_IFSET(CPU_FTR_CTRL)			\
-	addi	r3,r1,STACK_FRAME_OVERHEAD;		\
-	bl	hdlr;					\
-	b	.ret_from_except_lite
+#define STD_EXCEPTION_COMMON_ASYNC(trap, label, hdlr)		  \
+	EXCEPTION_COMMON(trap, label, hdlr, ret_from_except_lite, \
+			 FINISH_NAP;RUNLATCH_ON;DISABLE_INTS)
 
 /*
  * When the idle code in power4_idle puts the CPU into NAP mode,
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 7fdc2c0b7fa057bd1f9391e93d0332b9f8ccebb8..b1a215eabef6abc872ae94107a1be3af3c2bdad6 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1079,30 +1079,12 @@
 
 #define proc_trap()	asm volatile("trap")
 
-#ifdef CONFIG_PPC64
-
-extern void ppc64_runlatch_on(void);
-extern void __ppc64_runlatch_off(void);
-
-#define ppc64_runlatch_off()					\
-	do {							\
-		if (cpu_has_feature(CPU_FTR_CTRL) &&		\
-		    test_thread_flag(TIF_RUNLATCH))		\
-			__ppc64_runlatch_off();			\
-	} while (0)
+#define __get_SP()	({unsigned long sp; \
+			asm volatile("mr %0,1": "=r" (sp)); sp;})
 
 extern unsigned long scom970_read(unsigned int address);
 extern void scom970_write(unsigned int address, unsigned long value);
 
-#else
-#define ppc64_runlatch_on()
-#define ppc64_runlatch_off()
-
-#endif /* CONFIG_PPC64 */
-
-#define __get_SP()	({unsigned long sp; \
-			asm volatile("mr %0,1": "=r" (sp)); sp;})
-
 struct pt_regs;
 
 extern void ppc_save_regs(struct pt_regs *regs);
diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h
index c377457d1b898a13ce2ba24b6e0aab108069f91f..a02883d5af435f798bc0b174a6432134c084060c 100644
--- a/arch/powerpc/include/asm/system.h
+++ b/arch/powerpc/include/asm/system.h
@@ -550,5 +550,43 @@ extern void reloc_got2(unsigned long);
 
 extern struct dentry *powerpc_debugfs_root;
 
+#ifdef CONFIG_PPC64
+
+extern void __ppc64_runlatch_on(void);
+extern void __ppc64_runlatch_off(void);
+
+/*
+ * We manually hard enable-disable, this is called
+ * in the idle loop and we don't want to mess up
+ * with soft-disable/enable & interrupt replay.
+ */
+#define ppc64_runlatch_off()					\
+	do {							\
+		if (cpu_has_feature(CPU_FTR_CTRL) &&		\
+		    test_thread_local_flags(_TLF_RUNLATCH)) {	\
+			unsigned long msr = mfmsr();		\
+			__hard_irq_disable();			\
+			__ppc64_runlatch_off();			\
+			if (msr & MSR_EE)			\
+				__hard_irq_enable();		\
+		}      						\
+	} while (0)
+
+#define ppc64_runlatch_on()					\
+	do {							\
+		if (cpu_has_feature(CPU_FTR_CTRL) &&		\
+		    !test_thread_local_flags(_TLF_RUNLATCH)) {	\
+			unsigned long msr = mfmsr();		\
+			__hard_irq_disable();			\
+			__ppc64_runlatch_on();			\
+			if (msr & MSR_EE)			\
+				__hard_irq_enable();		\
+		}      						\
+	} while (0)
+#else
+#define ppc64_runlatch_on()
+#define ppc64_runlatch_off()
+#endif /* CONFIG_PPC64 */
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_SYSTEM_H */
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 964714940961a729c9b8d7404abb133dedb94c28..4a741c7efd02743a33d53bd6c56b24c8e4075596 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -110,7 +110,6 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_NOERROR		12	/* Force successful syscall return */
 #define TIF_NOTIFY_RESUME	13	/* callback before returning to user */
 #define TIF_SYSCALL_TRACEPOINT	15	/* syscall tracepoint instrumentation */
-#define TIF_RUNLATCH		16	/* Is the runlatch enabled? */
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
@@ -141,11 +140,13 @@ static inline struct thread_info *current_thread_info(void)
 #define TLF_SLEEPING		1	/* suspend code enabled SLEEP mode */
 #define TLF_RESTORE_SIGMASK	2	/* Restore signal mask in do_signal */
 #define TLF_LAZY_MMU		3	/* tlb_batch is active */
+#define TLF_RUNLATCH		4	/* Is the runlatch enabled? */
 
 #define _TLF_NAPPING		(1 << TLF_NAPPING)
 #define _TLF_SLEEPING		(1 << TLF_SLEEPING)
 #define _TLF_RESTORE_SIGMASK	(1 << TLF_RESTORE_SIGMASK)
 #define _TLF_LAZY_MMU		(1 << TLF_LAZY_MMU)
+#define _TLF_RUNLATCH		(1 << TLF_RUNLATCH)
 
 #ifndef __ASSEMBLY__
 #define HAVE_SET_RESTORE_SIGMASK	1
@@ -156,6 +157,12 @@ static inline void set_restore_sigmask(void)
 	set_bit(TIF_SIGPENDING, &ti->flags);
 }
 
+static inline bool test_thread_local_flags(unsigned int flags)
+{
+	struct thread_info *ti = current_thread_info();
+	return (ti->local_flags & flags) != 0;
+}
+
 #ifdef CONFIG_PPC64
 #define is_32bit_task()	(test_thread_flag(TIF_32BIT))
 #else
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 2240d4ecec028ab7375e150b56771a7dbe4e2ca9..3af80e82830bc82f6562dc169001f9a059e3c3a5 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -483,6 +483,9 @@ machine_check_common:
 system_call_entry:
 	b	system_call_common
 
+ppc64_runlatch_on_trampoline:
+	b	.__ppc64_runlatch_on
+
 /*
  * Here we have detected that the kernel stack pointer is bad.
  * R9 contains the saved CR, r13 points to the paca,
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index d817ab018486d71b8c06bb9b6275bd0f533d8c89..bf80a1d5f8fe8a56b5fdf1883293b79da97aa002 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1220,34 +1220,32 @@ void dump_stack(void)
 EXPORT_SYMBOL(dump_stack);
 
 #ifdef CONFIG_PPC64
-void ppc64_runlatch_on(void)
+/* Called with hard IRQs off */
+void __ppc64_runlatch_on(void)
 {
+	struct thread_info *ti = current_thread_info();
 	unsigned long ctrl;
 
-	if (cpu_has_feature(CPU_FTR_CTRL) && !test_thread_flag(TIF_RUNLATCH)) {
-		HMT_medium();
-
-		ctrl = mfspr(SPRN_CTRLF);
-		ctrl |= CTRL_RUNLATCH;
-		mtspr(SPRN_CTRLT, ctrl);
+	ctrl = mfspr(SPRN_CTRLF);
+	ctrl |= CTRL_RUNLATCH;
+	mtspr(SPRN_CTRLT, ctrl);
 
-		set_thread_flag(TIF_RUNLATCH);
-	}
+	ti->local_flags |= TLF_RUNLATCH;
 }
 
+/* Called with hard IRQs off */
 void __ppc64_runlatch_off(void)
 {
+	struct thread_info *ti = current_thread_info();
 	unsigned long ctrl;
 
-	HMT_medium();
-
-	clear_thread_flag(TIF_RUNLATCH);
+	ti->local_flags &= ~TLF_RUNLATCH;
 
 	ctrl = mfspr(SPRN_CTRLF);
 	ctrl &= ~CTRL_RUNLATCH;
 	mtspr(SPRN_CTRLT, ctrl);
 }
-#endif
+#endif /* CONFIG_PPC64 */
 
 #if THREAD_SHIFT < PAGE_SHIFT