diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h
index 1592a4264488e2c0646c5500fc6e4b8c44d5247a..e049723840d3298a42f6b3c39b9ac558fee1857d 100644
--- a/arch/arm/include/asm/mmu.h
+++ b/arch/arm/include/asm/mmu.h
@@ -10,7 +10,7 @@ typedef struct {
 #else
 	int		switch_pending;
 #endif
-	unsigned int	vmalloc_seq;
+	atomic_t	vmalloc_seq;
 	unsigned long	sigpage;
 #ifdef CONFIG_VDSO
 	unsigned long	vdso;
diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
index 84e58956fcab9ac32c8562bf1e3837b92d3efedd..db2cb06aa8cf5e9d5253a33b84d31b1a8a6ab828 100644
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@@ -23,6 +23,16 @@
 
 void __check_vmalloc_seq(struct mm_struct *mm);
 
+#ifdef CONFIG_MMU
+static inline void check_vmalloc_seq(struct mm_struct *mm)
+{
+	if (!IS_ENABLED(CONFIG_ARM_LPAE) &&
+	    unlikely(atomic_read(&mm->context.vmalloc_seq) !=
+		     atomic_read(&init_mm.context.vmalloc_seq)))
+		__check_vmalloc_seq(mm);
+}
+#endif
+
 #ifdef CONFIG_CPU_HAS_ASID
 
 void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk);
@@ -52,8 +62,7 @@ static inline void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm,
 static inline void check_and_switch_context(struct mm_struct *mm,
 					    struct task_struct *tsk)
 {
-	if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq))
-		__check_vmalloc_seq(mm);
+	check_vmalloc_seq(mm);
 
 	if (irqs_disabled())
 		/*
@@ -129,6 +138,15 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 #endif
 }
 
+#ifdef CONFIG_VMAP_STACK
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+	if (mm != &init_mm)
+		check_vmalloc_seq(mm);
+}
+#define enter_lazy_tlb enter_lazy_tlb
+#endif
+
 #include <asm-generic/mmu_context.h>
 
 #endif
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index 7b871ed99ccf0a63a5b883913997abf78adcff91..5fcc8a600e36d4c62ad53faabc7c7d3df4d92527 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -147,11 +147,10 @@ extern void copy_page(void *to, const void *from);
 #include <asm/pgtable-3level-types.h>
 #else
 #include <asm/pgtable-2level-types.h>
-#endif
-
 #ifdef CONFIG_VMAP_STACK
 #define ARCH_PAGE_TABLE_SYNC_MASK	PGTBL_PMD_MODIFIED
 #endif
+#endif
 
 #endif /* CONFIG_MMU */
 
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 3f38357efc4627c585c8a14f52db40a85387be46..08612032aefee7249ad000caa2dc9ef4041d5a13 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -885,6 +885,7 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs)
 	die("kernel stack overflow", regs, 0);
 }
 
+#ifndef CONFIG_ARM_LPAE
 /*
  * Normally, we rely on the logic in do_translation_fault() to update stale PMD
  * entries covering the vmalloc space in a task's page tables when it first
@@ -895,26 +896,14 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs)
  * So we need to ensure that these PMD entries are up to date *before* the MM
  * switch. As we already have some logic in the MM switch path that takes care
  * of this, let's trigger it by bumping the counter every time the core vmalloc
- * code modifies a PMD entry in the vmalloc region.
+ * code modifies a PMD entry in the vmalloc region. Use release semantics on
+ * the store so that other CPUs observing the counter's new value are
+ * guaranteed to see the updated page table entries as well.
  */
 void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
 {
-	if (start > VMALLOC_END || end < VMALLOC_START)
-		return;
-
-	/*
-	 * This hooks into the core vmalloc code to receive notifications of
-	 * any PMD level changes that have been made to the kernel page tables.
-	 * This means it should only be triggered once for every MiB worth of
-	 * vmalloc space, given that we don't support huge vmalloc/vmap on ARM,
-	 * and that kernel PMD level table entries are rarely (if ever)
-	 * updated.
-	 *
-	 * This means that the counter is going to max out at ~250 for the
-	 * typical case. If it overflows, something entirely unexpected has
-	 * occurred so let's throw a warning if that happens.
-	 */
-	WARN_ON(++init_mm.context.vmalloc_seq == UINT_MAX);
+	if (start < VMALLOC_END && end > VMALLOC_START)
+		atomic_inc_return_release(&init_mm.context.vmalloc_seq);
 }
-
+#endif
 #endif
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 48091870db89e420da8a9fa193420a9ac2e991f0..4204ffa2d104f144fc06e326d8005b83caaf09a2 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -240,8 +240,7 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
 	unsigned int cpu = smp_processor_id();
 	u64 asid;
 
-	if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq))
-		__check_vmalloc_seq(mm);
+	check_vmalloc_seq(mm);
 
 	/*
 	 * We cannot update the pgd and the ASID atomicly with classic
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 6e830b9418c94c7595c51f4cc4abb951d7c3ed29..8963c8c6347175295e9b78b51524ca7079a04ef8 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -117,16 +117,21 @@ EXPORT_SYMBOL(ioremap_page);
 
 void __check_vmalloc_seq(struct mm_struct *mm)
 {
-	unsigned int seq;
+	int seq;
 
 	do {
-		seq = init_mm.context.vmalloc_seq;
+		seq = atomic_read(&init_mm.context.vmalloc_seq);
 		memcpy(pgd_offset(mm, VMALLOC_START),
 		       pgd_offset_k(VMALLOC_START),
 		       sizeof(pgd_t) * (pgd_index(VMALLOC_END) -
 					pgd_index(VMALLOC_START)));
-		mm->context.vmalloc_seq = seq;
-	} while (seq != init_mm.context.vmalloc_seq);
+		/*
+		 * Use a store-release so that other CPUs that observe the
+		 * counter's new value are guaranteed to see the results of the
+		 * memcpy as well.
+		 */
+		atomic_set_release(&mm->context.vmalloc_seq, seq);
+	} while (seq != atomic_read(&init_mm.context.vmalloc_seq));
 }
 
 #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE)
@@ -157,7 +162,7 @@ static void unmap_area_sections(unsigned long virt, unsigned long size)
 			 * Note: this is still racy on SMP machines.
 			 */
 			pmd_clear(pmdp);
-			init_mm.context.vmalloc_seq++;
+			atomic_inc_return_release(&init_mm.context.vmalloc_seq);
 
 			/*
 			 * Free the page table, if there was one.
@@ -174,8 +179,7 @@ static void unmap_area_sections(unsigned long virt, unsigned long size)
 	 * Ensure that the active_mm is up to date - we want to
 	 * catch any use-after-iounmap cases.
 	 */
-	if (current->active_mm->context.vmalloc_seq != init_mm.context.vmalloc_seq)
-		__check_vmalloc_seq(current->active_mm);
+	check_vmalloc_seq(current->active_mm);
 
 	flush_tlb_kernel_range(virt, end);
 }