diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index e161fafb495b746f94d84a172b27708a1ce77816..e7ff58150e8ff927d9741764e8dfcf98a1f715e8 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -184,7 +184,7 @@ config SCHED_OMIT_FRAME_POINTER
 
 config PGTABLE_LEVELS
 	int
-	default 4
+	default 5
 
 source "init/Kconfig"
 
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 69b8a41fca84b4acf70b9b435ed5d430b43415af..624deaa44230c2bc55b63fe4d70757562dad8c3b 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -74,6 +74,7 @@ typedef struct { unsigned long pgste; } pgste_t;
 typedef struct { unsigned long pte; } pte_t;
 typedef struct { unsigned long pmd; } pmd_t;
 typedef struct { unsigned long pud; } pud_t;
+typedef struct { unsigned long p4d; } p4d_t;
 typedef struct { unsigned long pgd; } pgd_t;
 typedef pte_t *pgtable_t;
 
@@ -82,12 +83,14 @@ typedef pte_t *pgtable_t;
 #define pte_val(x)	((x).pte)
 #define pmd_val(x)	((x).pmd)
 #define pud_val(x)	((x).pud)
+#define p4d_val(x)	((x).p4d)
 #define pgd_val(x)      ((x).pgd)
 
 #define __pgste(x)	((pgste_t) { (x) } )
 #define __pte(x)        ((pte_t) { (x) } )
 #define __pmd(x)        ((pmd_t) { (x) } )
 #define __pud(x)	((pud_t) { (x) } )
+#define __p4d(x)	((p4d_t) { (x) } )
 #define __pgd(x)        ((pgd_t) { (x) } )
 #define __pgprot(x)     ((pgprot_t) { (x) } )
 
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 166f703dad7c4f0459ddf60825373962189342da..bb0ff1bb0c4a824e22e9919832e89fa2cb377977 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -51,12 +51,24 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm)
 		return _SEGMENT_ENTRY_EMPTY;
 	if (mm->context.asce_limit <= (1UL << 42))
 		return _REGION3_ENTRY_EMPTY;
-	return _REGION2_ENTRY_EMPTY;
+	if (mm->context.asce_limit <= (1UL << 53))
+		return _REGION2_ENTRY_EMPTY;
+	return _REGION1_ENTRY_EMPTY;
 }
 
-int crst_table_upgrade(struct mm_struct *);
+int crst_table_upgrade(struct mm_struct *mm, unsigned long limit);
 void crst_table_downgrade(struct mm_struct *);
 
+static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+	unsigned long *table = crst_table_alloc(mm);
+
+	if (table)
+		crst_table_init(table, _REGION2_ENTRY_EMPTY);
+	return (p4d_t *) table;
+}
+#define p4d_free(mm, p4d) crst_table_free(mm, (unsigned long *) p4d)
+
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	unsigned long *table = crst_table_alloc(mm);
@@ -86,9 +98,14 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 	crst_table_free(mm, (unsigned long *) pmd);
 }
 
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
+{
+	pgd_val(*pgd) = _REGION1_ENTRY | __pa(p4d);
+}
+
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
 {
-	pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud);
+	p4d_val(*p4d) = _REGION2_ENTRY | __pa(pud);
 }
 
 static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index e6e3b887bee3d667615da98fb3a6820012d90df2..3effb26f0e1a5e16201496651881882320047207 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -24,7 +24,6 @@
  * the S390 page table tree.
  */
 #ifndef __ASSEMBLY__
-#include <asm-generic/5level-fixup.h>
 #include <linux/sched.h>
 #include <linux/mm_types.h>
 #include <linux/page-flags.h>
@@ -87,12 +86,15 @@ extern unsigned long zero_page_mask;
  */
 #define PMD_SHIFT	20
 #define PUD_SHIFT	31
-#define PGDIR_SHIFT	42
+#define P4D_SHIFT	42
+#define PGDIR_SHIFT	53
 
 #define PMD_SIZE        (1UL << PMD_SHIFT)
 #define PMD_MASK        (~(PMD_SIZE-1))
 #define PUD_SIZE	(1UL << PUD_SHIFT)
 #define PUD_MASK	(~(PUD_SIZE-1))
+#define P4D_SIZE	(1UL << P4D_SHIFT)
+#define P4D_MASK	(~(P4D_SIZE-1))
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
@@ -105,6 +107,7 @@ extern unsigned long zero_page_mask;
 #define PTRS_PER_PTE	256
 #define PTRS_PER_PMD	2048
 #define PTRS_PER_PUD	2048
+#define PTRS_PER_P4D	2048
 #define PTRS_PER_PGD	2048
 
 #define FIRST_USER_ADDRESS  0UL
@@ -115,6 +118,8 @@ extern unsigned long zero_page_mask;
 	printk("%s:%d: bad pmd %p.\n", __FILE__, __LINE__, (void *) pmd_val(e))
 #define pud_ERROR(e) \
 	printk("%s:%d: bad pud %p.\n", __FILE__, __LINE__, (void *) pud_val(e))
+#define p4d_ERROR(e) \
+	printk("%s:%d: bad p4d %p.\n", __FILE__, __LINE__, (void *) p4d_val(e))
 #define pgd_ERROR(e) \
 	printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e))
 
@@ -310,8 +315,8 @@ static inline int is_module_addr(void *addr)
 #define _REGION3_ENTRY_SOFT_DIRTY 0x0000 /* SW region soft dirty bit */
 #endif
 
-#define _REGION_ENTRY_BITS	 0xfffffffffffff227UL
-#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe27UL
+#define _REGION_ENTRY_BITS	 0xfffffffffffff22fUL
+#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL
 
 /* Bits in the segment table entry */
 #define _SEGMENT_ENTRY_BITS	0xfffffffffffffe33UL
@@ -564,14 +569,14 @@ static inline void crdte(unsigned long old, unsigned long new,
  */
 static inline int pgd_present(pgd_t pgd)
 {
-	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
+	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R1)
 		return 1;
 	return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL;
 }
 
 static inline int pgd_none(pgd_t pgd)
 {
-	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
+	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R1)
 		return 0;
 	return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL;
 }
@@ -589,6 +594,28 @@ static inline int pgd_bad(pgd_t pgd)
 	return (pgd_val(pgd) & mask) != 0;
 }
 
+static inline int p4d_present(p4d_t p4d)
+{
+	if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
+		return 1;
+	return (p4d_val(p4d) & _REGION_ENTRY_ORIGIN) != 0UL;
+}
+
+static inline int p4d_none(p4d_t p4d)
+{
+	if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
+		return 0;
+	return p4d_val(p4d) == _REGION2_ENTRY_EMPTY;
+}
+
+static inline unsigned long p4d_pfn(p4d_t p4d)
+{
+	unsigned long origin_mask;
+
+	origin_mask = _REGION_ENTRY_ORIGIN;
+	return (p4d_val(p4d) & origin_mask) >> PAGE_SHIFT;
+}
+
 static inline int pud_present(pud_t pud)
 {
 	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
@@ -641,6 +668,13 @@ static inline int pud_bad(pud_t pud)
 	return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0;
 }
 
+static inline int p4d_bad(p4d_t p4d)
+{
+	if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
+		return pud_bad(__pud(p4d_val(p4d)));
+	return (p4d_val(p4d) & ~_REGION_ENTRY_BITS) != 0;
+}
+
 static inline int pmd_present(pmd_t pmd)
 {
 	return pmd_val(pmd) != _SEGMENT_ENTRY_EMPTY;
@@ -794,8 +828,14 @@ static inline int pte_unused(pte_t pte)
 
 static inline void pgd_clear(pgd_t *pgd)
 {
-	if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
-		pgd_val(*pgd) = _REGION2_ENTRY_EMPTY;
+	if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
+		pgd_val(*pgd) = _REGION1_ENTRY_EMPTY;
+}
+
+static inline void p4d_clear(p4d_t *p4d)
+{
+	if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
+		p4d_val(*p4d) = _REGION2_ENTRY_EMPTY;
 }
 
 static inline void pud_clear(pud_t *pud)
@@ -1089,6 +1129,7 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
 }
 
 #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+#define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
 #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
 #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
 #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
@@ -1098,19 +1139,31 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
 
 #define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN)
 #define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN)
+#define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN)
 #define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN)
 
-static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
+static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
 {
-	pud_t *pud = (pud_t *) pgd;
-	if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
-		pud = (pud_t *) pgd_deref(*pgd);
-	return pud  + pud_index(address);
+	p4d_t *p4d = (p4d_t *) pgd;
+
+	if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
+		p4d = (p4d_t *) pgd_deref(*pgd);
+	return p4d + p4d_index(address);
+}
+
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+{
+	pud_t *pud = (pud_t *) p4d;
+
+	if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
+		pud = (pud_t *) p4d_deref(*p4d);
+	return pud + pud_index(address);
 }
 
 static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
 {
 	pmd_t *pmd = (pmd_t *) pud;
+
 	if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
 		pmd = (pmd_t *) pud_deref(*pud);
 	return pmd + pmd_index(address);
@@ -1122,6 +1175,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
 
 #define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
 #define pud_page(pud) pfn_to_page(pud_pfn(pud))
+#define p4d_page(pud) pfn_to_page(p4d_pfn(p4d))
 
 /* Find an entry in the lowest level page table.. */
 #define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr))
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 60d395fdc86438e55f49ddf853dca7b6f99582b3..f57c017a5c037eba9c8fead3b5c318cedbfef6a6 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -92,11 +92,11 @@ extern void execve_tail(void);
  */
 
 #define TASK_SIZE_OF(tsk)	(test_tsk_thread_flag(tsk, TIF_31BIT) ? \
-					(1UL << 31) : (1UL << 53))
+					(1UL << 31) : -PAGE_SIZE)
 #define TASK_UNMAPPED_BASE	(test_thread_flag(TIF_31BIT) ? \
 					(1UL << 30) : (1UL << 41))
 #define TASK_SIZE		TASK_SIZE_OF(current)
-#define TASK_SIZE_MAX		(1UL << 53)
+#define TASK_SIZE_MAX		(-PAGE_SIZE)
 
 #define STACK_TOP		(test_thread_flag(TIF_31BIT) ? \
 					(1UL << 31) : (1UL << 42))
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 853b2a3d8deeeac08254be515502500a7842a3d9..7317b3108a88859a91523c45f1e52c08cb22fdc4 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -136,6 +136,21 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
 	tlb_remove_table(tlb, pmd);
 }
 
+/*
+ * p4d_free_tlb frees a pud table and clears the CRSTE for the
+ * region second table entry from the tlb.
+ * If the mm uses a four level page table the single p4d is freed
+ * as the pgd. p4d_free_tlb checks the asce_limit against 8PB
+ * to avoid the double free of the p4d in this case.
+ */
+static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
+				unsigned long address)
+{
+	if (tlb->mm->context.asce_limit <= (1UL << 53))
+		return;
+	tlb_remove_table(tlb, p4d);
+}
+
 /*
  * pud_free_tlb frees a pud table and clears the CRSTE for the
  * region third table entry from the tlb.
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 1b553d847140d4fc10ea7eee4a52757050df6fc2..049c3c455b32e7732420f748a8aa6a57704c8826 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -149,7 +149,7 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
 }
 
 static void walk_pud_level(struct seq_file *m, struct pg_state *st,
-			   pgd_t *pgd, unsigned long addr)
+			   p4d_t *p4d, unsigned long addr)
 {
 	unsigned int prot;
 	pud_t *pud;
@@ -157,7 +157,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st,
 
 	for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) {
 		st->current_address = addr;
-		pud = pud_offset(pgd, addr);
+		pud = pud_offset(p4d, addr);
 		if (!pud_none(*pud))
 			if (pud_large(*pud)) {
 				prot = pud_val(*pud) &
@@ -172,6 +172,23 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st,
 	}
 }
 
+static void walk_p4d_level(struct seq_file *m, struct pg_state *st,
+			   pgd_t *pgd, unsigned long addr)
+{
+	p4d_t *p4d;
+	int i;
+
+	for (i = 0; i < PTRS_PER_P4D && addr < max_addr; i++) {
+		st->current_address = addr;
+		p4d = p4d_offset(pgd, addr);
+		if (!p4d_none(*p4d))
+			walk_pud_level(m, st, p4d, addr);
+		else
+			note_page(m, st, _PAGE_INVALID, 2);
+		addr += P4D_SIZE;
+	}
+}
+
 static void walk_pgd_level(struct seq_file *m)
 {
 	unsigned long addr = 0;
@@ -184,7 +201,7 @@ static void walk_pgd_level(struct seq_file *m)
 		st.current_address = addr;
 		pgd = pgd_offset_k(addr);
 		if (!pgd_none(*pgd))
-			walk_pud_level(m, &st, pgd, addr);
+			walk_p4d_level(m, &st, pgd, addr);
 		else
 			note_page(m, &st, _PAGE_INVALID, 1);
 		addr += PGDIR_SIZE;
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 7f6db1e6c048aa7c6a9e303e20d8f3fac2c4f55c..fbd664e4809830894033cc17fd7a49ebd8b52a04 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -537,6 +537,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
 	unsigned long *table;
 	spinlock_t *ptl;
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	int rc;
@@ -573,7 +574,9 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
 	mm = gmap->mm;
 	pgd = pgd_offset(mm, vmaddr);
 	VM_BUG_ON(pgd_none(*pgd));
-	pud = pud_offset(pgd, vmaddr);
+	p4d = p4d_offset(pgd, vmaddr);
+	VM_BUG_ON(p4d_none(*p4d));
+	pud = pud_offset(p4d, vmaddr);
 	VM_BUG_ON(pud_none(*pud));
 	/* large puds cannot yet be handled */
 	if (pud_large(*pud))
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index b7b779c40a5bbffe9c0387ff6aa2378dc57580ae..8ecc25e760fa6dfba0f082cfd8dff38b8c33d9e9 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -166,15 +166,15 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
 	return 1;
 }
 
-static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
+static inline int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr,
 		unsigned long end, int write, struct page **pages, int *nr)
 {
 	unsigned long next;
 	pud_t *pudp, pud;
 
-	pudp = (pud_t *) pgdp;
-	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
-		pudp = (pud_t *) pgd_deref(pgd);
+	pudp = (pud_t *) p4dp;
+	if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
+		pudp = (pud_t *) p4d_deref(p4d);
 	pudp += pud_index(addr);
 	do {
 		pud = *pudp;
@@ -194,6 +194,29 @@ static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
 	return 1;
 }
 
+static inline int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
+		unsigned long end, int write, struct page **pages, int *nr)
+{
+	unsigned long next;
+	p4d_t *p4dp, p4d;
+
+	p4dp = (p4d_t *) pgdp;
+	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
+		p4dp = (p4d_t *) pgd_deref(pgd);
+	p4dp += p4d_index(addr);
+	do {
+		p4d = *p4dp;
+		barrier();
+		next = p4d_addr_end(addr, end);
+		if (p4d_none(p4d))
+			return 0;
+		if (!gup_pud_range(p4dp, p4d, addr, next, write, pages, nr))
+			return 0;
+	} while (p4dp++, addr = next, addr != end);
+
+	return 1;
+}
+
 /*
  * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
  * back to the regular GUP.
@@ -228,7 +251,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 		next = pgd_addr_end(addr, end);
 		if (pgd_none(pgd))
 			break;
-		if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr))
+		if (!gup_p4d_range(pgdp, pgd, addr, next, write, pages, &nr))
 			break;
 	} while (pgdp++, addr = next, addr != end);
 	local_irq_restore(flags);
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 9b4050caa4e92f0465f61df52386a3ffc517ce3d..d3a5e39756f62549ce53b1a20c91df0cbc787be8 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -162,16 +162,20 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 			unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgdp;
+	p4d_t *p4dp;
 	pud_t *pudp;
 	pmd_t *pmdp = NULL;
 
 	pgdp = pgd_offset(mm, addr);
-	pudp = pud_alloc(mm, pgdp, addr);
-	if (pudp) {
-		if (sz == PUD_SIZE)
-			return (pte_t *) pudp;
-		else if (sz == PMD_SIZE)
-			pmdp = pmd_alloc(mm, pudp, addr);
+	p4dp = p4d_alloc(mm, pgdp, addr);
+	if (p4dp) {
+		pudp = pud_alloc(mm, p4dp, addr);
+		if (pudp) {
+			if (sz == PUD_SIZE)
+				return (pte_t *) pudp;
+			else if (sz == PMD_SIZE)
+				pmdp = pmd_alloc(mm, pudp, addr);
+		}
 	}
 	return (pte_t *) pmdp;
 }
@@ -179,16 +183,20 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 {
 	pgd_t *pgdp;
+	p4d_t *p4dp;
 	pud_t *pudp;
 	pmd_t *pmdp = NULL;
 
 	pgdp = pgd_offset(mm, addr);
 	if (pgd_present(*pgdp)) {
-		pudp = pud_offset(pgdp, addr);
-		if (pud_present(*pudp)) {
-			if (pud_large(*pudp))
-				return (pte_t *) pudp;
-			pmdp = pmd_offset(pudp, addr);
+		p4dp = p4d_offset(pgdp, addr);
+		if (p4d_present(*p4dp)) {
+			pudp = pud_offset(p4dp, addr);
+			if (pud_present(*pudp)) {
+				if (pud_large(*pudp))
+					return (pte_t *) pudp;
+				pmdp = pmd_offset(pudp, addr);
+			}
 		}
 	}
 	return (pte_t *) pmdp;
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index b017daed6887847c8eb025e146ae22b37dedfb2c..8c5f284044efd2a9a1a10c8c1f3367ed0f83638d 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -120,7 +120,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 
 check_asce_limit:
 	if (addr + len > current->mm->context.asce_limit) {
-		rc = crst_table_upgrade(mm);
+		rc = crst_table_upgrade(mm, addr + len);
 		if (rc)
 			return (unsigned long) rc;
 	}
@@ -184,7 +184,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
 check_asce_limit:
 	if (addr + len > current->mm->context.asce_limit) {
-		rc = crst_table_upgrade(mm);
+		rc = crst_table_upgrade(mm, addr + len);
 		if (rc)
 			return (unsigned long) rc;
 	}
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 49e721f3645e8ce143856ed82eb046fd34a9f235..1804815892465834c56b5a6bfcf90d3b86359b20 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -229,14 +229,14 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr,
 	pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
 }
 
-static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end,
+static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end,
 			  unsigned long flags)
 {
 	unsigned long next;
 	pud_t *pudp;
 	int rc = 0;
 
-	pudp = pud_offset(pgd, addr);
+	pudp = pud_offset(p4d, addr);
 	do {
 		if (pud_none(*pudp))
 			return -EINVAL;
@@ -259,6 +259,26 @@ static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end,
 	return rc;
 }
 
+static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end,
+			  unsigned long flags)
+{
+	unsigned long next;
+	p4d_t *p4dp;
+	int rc = 0;
+
+	p4dp = p4d_offset(pgd, addr);
+	do {
+		if (p4d_none(*p4dp))
+			return -EINVAL;
+		next = p4d_addr_end(addr, end);
+		rc = walk_pud_level(p4dp, addr, next, flags);
+		p4dp++;
+		addr = next;
+		cond_resched();
+	} while (addr < end && !rc);
+	return rc;
+}
+
 static DEFINE_MUTEX(cpa_mutex);
 
 static int change_page_attr(unsigned long addr, unsigned long end,
@@ -278,7 +298,7 @@ static int change_page_attr(unsigned long addr, unsigned long end,
 		if (pgd_none(*pgdp))
 			break;
 		next = pgd_addr_end(addr, end);
-		rc = walk_pud_level(pgdp, addr, next, flags);
+		rc = walk_p4d_level(pgdp, addr, next, flags);
 		if (rc)
 			break;
 		cond_resched();
@@ -319,6 +339,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
 	unsigned long address;
 	int nr, i, j;
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
@@ -326,7 +347,8 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
 	for (i = 0; i < numpages;) {
 		address = page_to_phys(page + i);
 		pgd = pgd_offset_k(address);
-		pud = pud_offset(pgd, address);
+		p4d = p4d_offset(pgd, address);
+		pud = pud_offset(p4d, address);
 		pmd = pmd_offset(pud, address);
 		pte = pte_offset_kernel(pmd, address);
 		nr = (unsigned long)pte >> ilog2(sizeof(long));
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index f502cbe657afb06712202216edd1914cf950a3aa..18918e394ce4b67614f9ac0e70a7a112f73fbc6a 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -76,29 +76,46 @@ static void __crst_table_upgrade(void *arg)
 	__tlb_flush_local();
 }
 
-int crst_table_upgrade(struct mm_struct *mm)
+int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
 {
 	unsigned long *table, *pgd;
+	int rc, notify;
 
-	/* upgrade should only happen from 3 to 4 levels */
-	BUG_ON(mm->context.asce_limit != (1UL << 42));
-
-	table = crst_table_alloc(mm);
-	if (!table)
+	/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
+	BUG_ON(mm->context.asce_limit < (1UL << 42));
+	if (end >= TASK_SIZE_MAX)
 		return -ENOMEM;
-
-	spin_lock_bh(&mm->page_table_lock);
-	pgd = (unsigned long *) mm->pgd;
-	crst_table_init(table, _REGION2_ENTRY_EMPTY);
-	pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
-	mm->pgd = (pgd_t *) table;
-	mm->context.asce_limit = 1UL << 53;
-	mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
-			   _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
-	spin_unlock_bh(&mm->page_table_lock);
-
-	on_each_cpu(__crst_table_upgrade, mm, 0);
-	return 0;
+	rc = 0;
+	notify = 0;
+	while (mm->context.asce_limit < end) {
+		table = crst_table_alloc(mm);
+		if (!table) {
+			rc = -ENOMEM;
+			break;
+		}
+		spin_lock_bh(&mm->page_table_lock);
+		pgd = (unsigned long *) mm->pgd;
+		if (mm->context.asce_limit == (1UL << 42)) {
+			crst_table_init(table, _REGION2_ENTRY_EMPTY);
+			p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd);
+			mm->pgd = (pgd_t *) table;
+			mm->context.asce_limit = 1UL << 53;
+			mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+				_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+		} else {
+			crst_table_init(table, _REGION1_ENTRY_EMPTY);
+			pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd);
+			mm->pgd = (pgd_t *) table;
+			mm->context.asce_limit = -PAGE_SIZE;
+			mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+				_ASCE_USER_BITS | _ASCE_TYPE_REGION1;
+		}
+		notify = 1;
+		spin_unlock_bh(&mm->page_table_lock);
+	}
+	if (notify)
+		on_each_cpu(__crst_table_upgrade, mm, 0);
+	return rc;
 }
 
 void crst_table_downgrade(struct mm_struct *mm)
@@ -274,7 +291,7 @@ static void __tlb_remove_table(void *_table)
 	struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
 
 	switch (mask) {
-	case 0:		/* pmd or pud */
+	case 0:		/* pmd, pud, or p4d */
 		free_pages((unsigned long) table, 2);
 		break;
 	case 1:		/* lower 2K of a 4K page table */
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 947b66a5cdba730573a4f0f4bd299a6d7b7b52bd..d4d409ba206b2e0f4ed0b88cc4f3a3ba125b3597 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -610,6 +610,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
 {
 	spinlock_t *ptl;
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pgste_t pgste;
@@ -618,7 +619,10 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
 	bool dirty;
 
 	pgd = pgd_offset(mm, addr);
-	pud = pud_alloc(mm, pgd, addr);
+	p4d = p4d_alloc(mm, pgd, addr);
+	if (!p4d)
+		return false;
+	pud = pud_alloc(mm, p4d, addr);
 	if (!pud)
 		return false;
 	pmd = pmd_alloc(mm, pud, addr);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index c33c94b4be6036e8558e677c78136e3300c48ae3..d8398962a7236e2264a0424b0bc4a4d4befd9493 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -38,6 +38,17 @@ static void __ref *vmem_alloc_pages(unsigned int order)
 	return (void *) memblock_alloc(size, size);
 }
 
+static inline p4d_t *vmem_p4d_alloc(void)
+{
+	p4d_t *p4d = NULL;
+
+	p4d = vmem_alloc_pages(2);
+	if (!p4d)
+		return NULL;
+	clear_table((unsigned long *) p4d, _REGION2_ENTRY_EMPTY, PAGE_SIZE * 4);
+	return p4d;
+}
+
 static inline pud_t *vmem_pud_alloc(void)
 {
 	pud_t *pud = NULL;
@@ -85,6 +96,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
 	unsigned long end = start + size;
 	unsigned long address = start;
 	pgd_t *pg_dir;
+	p4d_t *p4_dir;
 	pud_t *pu_dir;
 	pmd_t *pm_dir;
 	pte_t *pt_dir;
@@ -102,12 +114,19 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
 	while (address < end) {
 		pg_dir = pgd_offset_k(address);
 		if (pgd_none(*pg_dir)) {
+			p4_dir = vmem_p4d_alloc();
+			if (!p4_dir)
+				goto out;
+			pgd_populate(&init_mm, pg_dir, p4_dir);
+		}
+		p4_dir = p4d_offset(pg_dir, address);
+		if (p4d_none(*p4_dir)) {
 			pu_dir = vmem_pud_alloc();
 			if (!pu_dir)
 				goto out;
-			pgd_populate(&init_mm, pg_dir, pu_dir);
+			p4d_populate(&init_mm, p4_dir, pu_dir);
 		}
-		pu_dir = pud_offset(pg_dir, address);
+		pu_dir = pud_offset(p4_dir, address);
 		if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
 		    !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) &&
 		     !debug_pagealloc_enabled()) {
@@ -161,6 +180,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
 	unsigned long end = start + size;
 	unsigned long address = start;
 	pgd_t *pg_dir;
+	p4d_t *p4_dir;
 	pud_t *pu_dir;
 	pmd_t *pm_dir;
 	pte_t *pt_dir;
@@ -172,7 +192,12 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
 			address += PGDIR_SIZE;
 			continue;
 		}
-		pu_dir = pud_offset(pg_dir, address);
+		p4_dir = p4d_offset(pg_dir, address);
+		if (p4d_none(*p4_dir)) {
+			address += P4D_SIZE;
+			continue;
+		}
+		pu_dir = pud_offset(p4_dir, address);
 		if (pud_none(*pu_dir)) {
 			address += PUD_SIZE;
 			continue;
@@ -213,6 +238,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 	unsigned long pgt_prot, sgt_prot;
 	unsigned long address = start;
 	pgd_t *pg_dir;
+	p4d_t *p4_dir;
 	pud_t *pu_dir;
 	pmd_t *pm_dir;
 	pte_t *pt_dir;
@@ -227,13 +253,21 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 	for (address = start; address < end;) {
 		pg_dir = pgd_offset_k(address);
 		if (pgd_none(*pg_dir)) {
+			p4_dir = vmem_p4d_alloc();
+			if (!p4_dir)
+				goto out;
+			pgd_populate(&init_mm, pg_dir, p4_dir);
+		}
+
+		p4_dir = p4d_offset(pg_dir, address);
+		if (p4d_none(*p4_dir)) {
 			pu_dir = vmem_pud_alloc();
 			if (!pu_dir)
 				goto out;
-			pgd_populate(&init_mm, pg_dir, pu_dir);
+			p4d_populate(&init_mm, p4_dir, pu_dir);
 		}
 
-		pu_dir = pud_offset(pg_dir, address);
+		pu_dir = pud_offset(p4_dir, address);
 		if (pud_none(*pu_dir)) {
 			pm_dir = vmem_pmd_alloc();
 			if (!pm_dir)