diff --git a/.mailmap b/.mailmap
index 632700cee55cdaeaea8d6765037150f493271c8f..b1ab0129c7d6b034b1e41355892afdebf60ebdb8 100644
--- a/.mailmap
+++ b/.mailmap
@@ -55,6 +55,8 @@ Bart Van Assche <bvanassche@acm.org> <bart.vanassche@wdc.com>
 Ben Gardner <bgardner@wabtec.com>
 Ben M Cahill <ben.m.cahill@intel.com>
 BjÃ¶rn Steinbrink <B.Steinbrink@gmx.de>
+BjÃ¶rn TÃ¶pel <bjorn@kernel.org> <bjorn.topel@gmail.com>
+BjÃ¶rn TÃ¶pel <bjorn@kernel.org> <bjorn.topel@intel.com>
 Boris Brezillon <bbrezillon@kernel.org> <b.brezillon.dev@gmail.com>
 Boris Brezillon <bbrezillon@kernel.org> <b.brezillon@overkiz.com>
 Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@bootlin.com>
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 9e3cdb271d06cfc198e80824ecfd83b305ae326c..a10b545c2070a54455c3e7181cc43135dc68f611 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5972,6 +5972,10 @@
 			This option is obsoleted by the "nopv" option, which
 			has equivalent effect for XEN platform.
 
+	xen_no_vector_callback
+			[KNL,X86,XEN] Disable the vector callback for Xen
+			event channel interrupts.
+
 	xen_scrub_pages=	[XEN]
 			Boolean option to control scrubbing pages before giving them back
 			to Xen, for use by other domains. Can be also changed at runtime
diff --git a/Documentation/firmware-guide/acpi/apei/einj.rst b/Documentation/firmware-guide/acpi/apei/einj.rst
index e588bccf5158370fb03dfe4db06b20ee93114108..c042176e17078f5a72eba00b501d665475566c76 100644
--- a/Documentation/firmware-guide/acpi/apei/einj.rst
+++ b/Documentation/firmware-guide/acpi/apei/einj.rst
@@ -50,8 +50,8 @@ The following files belong to it:
   0x00000010        Memory Uncorrectable non-fatal
   0x00000020        Memory Uncorrectable fatal
   0x00000040        PCI Express Correctable
-  0x00000080        PCI Express Uncorrectable fatal
-  0x00000100        PCI Express Uncorrectable non-fatal
+  0x00000080        PCI Express Uncorrectable non-fatal
+  0x00000100        PCI Express Uncorrectable fatal
   0x00000200        Platform Correctable
   0x00000400        Platform Uncorrectable non-fatal
   0x00000800        Platform Uncorrectable fatal
diff --git a/Documentation/networking/tls-offload.rst b/Documentation/networking/tls-offload.rst
index 9af3334d9ad080c1aaef9dca67f709a2fa8ef343..5f0dea3d571e359a2655defdcf9b7383b9801ba8 100644
--- a/Documentation/networking/tls-offload.rst
+++ b/Documentation/networking/tls-offload.rst
@@ -534,3 +534,6 @@ offload. Hence, TLS TX device feature flag requires TX csum offload being set.
 Disabling the latter implies clearing the former. Disabling TX checksum offload
 should not affect old connections, and drivers should make sure checksum
 calculation does not break for them.
+Similarly, device-offloaded TLS decryption implies doing RXCSUM. If the user
+does not want to enable RX csum offload, TLS RX device feature is disabled
+as well.
diff --git a/MAINTAINERS b/MAINTAINERS
index 096b584e7fedebf2f581f0f78eafd6cfe00f56f3..1df56a32d2df9570c070696ed22973d134000dde 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -906,7 +906,7 @@ AMD KFD
 M:	Felix Kuehling <Felix.Kuehling@amd.com>
 L:	amd-gfx@lists.freedesktop.org
 S:	Supported
-T:	git git://people.freedesktop.org/~agd5f/linux
+T:	git https://gitlab.freedesktop.org/agd5f/linux.git
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd*.[ch]
 F:	drivers/gpu/drm/amd/amdkfd/
 F:	drivers/gpu/drm/amd/include/cik_structs.h
@@ -3334,7 +3334,7 @@ F:	arch/riscv/net/
 X:	arch/riscv/net/bpf_jit_comp64.c
 
 BPF JIT for RISC-V (64-bit)
-M:	BjÃ¶rn TÃ¶pel <bjorn.topel@gmail.com>
+M:	BjÃ¶rn TÃ¶pel <bjorn@kernel.org>
 L:	netdev@vger.kernel.org
 L:	bpf@vger.kernel.org
 S:	Maintained
@@ -14827,7 +14827,7 @@ M:	Alex Deucher <alexander.deucher@amd.com>
 M:	Christian KÃ¶nig <christian.koenig@amd.com>
 L:	amd-gfx@lists.freedesktop.org
 S:	Supported
-T:	git git://people.freedesktop.org/~agd5f/linux
+T:	git https://gitlab.freedesktop.org/agd5f/linux.git
 F:	drivers/gpu/drm/amd/
 F:	drivers/gpu/drm/radeon/
 F:	include/uapi/drm/amdgpu_drm.h
@@ -16328,6 +16328,7 @@ M:	Pekka Enberg <penberg@kernel.org>
 M:	David Rientjes <rientjes@google.com>
 M:	Joonsoo Kim <iamjoonsoo.kim@lge.com>
 M:	Andrew Morton <akpm@linux-foundation.org>
+M:	Vlastimil Babka <vbabka@suse.cz>
 L:	linux-mm@kvack.org
 S:	Maintained
 F:	include/linux/sl?b*.h
@@ -19430,7 +19431,7 @@ F:	drivers/net/ethernet/*/*/*xdp*
 K:	(?:\b|_)xdp(?:\b|_)
 
 XDP SOCKETS (AF_XDP)
-M:	BjÃ¶rn TÃ¶pel <bjorn.topel@intel.com>
+M:	BjÃ¶rn TÃ¶pel <bjorn@kernel.org>
 M:	Magnus Karlsson <magnus.karlsson@intel.com>
 R:	Jonathan Lemon <jonathan.lemon@gmail.com>
 L:	netdev@vger.kernel.org
diff --git a/Makefile b/Makefile
index 9e73f82e0d86360250203b9f51b2fcdbee564ba0..b0e4767735dca21d441c0ae373c53cf9a09ecf11 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 11
 SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
 NAME = Kleptomaniac Octopus
 
 # *DOCUMENTATION*
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 60e901cd0de6a31af593f4100c21ee2e3edc3def..5a957a9a09843c1031b7a9a3a46d5a454c417867 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -371,7 +371,7 @@ static int __init xen_guest_init(void)
 	}
 	gnttab_init();
 	if (!xen_initial_domain())
-		xenbus_probe(NULL);
+		xenbus_probe();
 
 	/*
 	 * Making sure board specific code will not set up ops for
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 05e17351e4f334d765744af5172050c843bda79e..f39568b28ec1c47a5abbe2db43be5529111291fa 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -174,8 +174,6 @@ config ARM64
 	select HAVE_NMI
 	select HAVE_PATA_PLATFORM
 	select HAVE_PERF_EVENTS
-	select HAVE_PERF_EVENTS_NMI if ARM64_PSEUDO_NMI && HW_PERF_EVENTS
-	select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 015ddffaf6caa3213813fcdcbb5601b1a3ec95d8..b56a4b2bc24864081198bc104ab6f26d6855201c 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -17,7 +17,7 @@
 #include <asm/lse.h>
 
 #define ATOMIC_OP(op)							\
-static inline void arch_##op(int i, atomic_t *v)			\
+static __always_inline void arch_##op(int i, atomic_t *v)		\
 {									\
 	__lse_ll_sc_body(op, i, v);					\
 }
@@ -32,7 +32,7 @@ ATOMIC_OP(atomic_sub)
 #undef ATOMIC_OP
 
 #define ATOMIC_FETCH_OP(name, op)					\
-static inline int arch_##op##name(int i, atomic_t *v)			\
+static __always_inline int arch_##op##name(int i, atomic_t *v)		\
 {									\
 	return __lse_ll_sc_body(op##name, i, v);			\
 }
@@ -56,7 +56,7 @@ ATOMIC_FETCH_OPS(atomic_sub_return)
 #undef ATOMIC_FETCH_OPS
 
 #define ATOMIC64_OP(op)							\
-static inline void arch_##op(long i, atomic64_t *v)			\
+static __always_inline void arch_##op(long i, atomic64_t *v)		\
 {									\
 	__lse_ll_sc_body(op, i, v);					\
 }
@@ -71,7 +71,7 @@ ATOMIC64_OP(atomic64_sub)
 #undef ATOMIC64_OP
 
 #define ATOMIC64_FETCH_OP(name, op)					\
-static inline long arch_##op##name(long i, atomic64_t *v)		\
+static __always_inline long arch_##op##name(long i, atomic64_t *v)	\
 {									\
 	return __lse_ll_sc_body(op##name, i, v);			\
 }
@@ -94,7 +94,7 @@ ATOMIC64_FETCH_OPS(atomic64_sub_return)
 #undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_FETCH_OPS
 
-static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
+static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v)
 {
 	return __lse_ll_sc_body(atomic64_dec_if_positive, v);
 }
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 69ad25fbeae4af047de3f697b0958559b6e7ecf1..ca2cd75d32863c8301129700e0c6ab6f67183add 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -94,8 +94,7 @@
 #endif /* CONFIG_ARM64_FORCE_52BIT */
 
 extern phys_addr_t arm64_dma_phys_limit;
-extern phys_addr_t arm64_dma32_phys_limit;
-#define ARCH_LOW_ADDRESS_LIMIT	((arm64_dma_phys_limit ? : arm64_dma32_phys_limit) - 1)
+#define ARCH_LOW_ADDRESS_LIMIT	(arm64_dma_phys_limit - 1)
 
 struct debug_info {
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index f42fd9e339815f7b97525cd30b1ebe61692393c0..3017844635874838c968780c48a1c4f22312435b 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -75,7 +75,7 @@ int main(void)
   DEFINE(S_SDEI_TTBR1,		offsetof(struct pt_regs, sdei_ttbr1));
   DEFINE(S_PMR_SAVE,		offsetof(struct pt_regs, pmr_save));
   DEFINE(S_STACKFRAME,		offsetof(struct pt_regs, stackframe));
-  DEFINE(S_FRAME_SIZE,		sizeof(struct pt_regs));
+  DEFINE(PT_REGS_SIZE,		sizeof(struct pt_regs));
   BLANK();
 #ifdef CONFIG_COMPAT
   DEFINE(COMPAT_SIGFRAME_REGS_OFFSET,		offsetof(struct compat_sigframe, uc.uc_mcontext.arm_r0));
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index a338f40e64d393b2f78d5b01b4bda24930e0db87..b3e4f9a088b1a76118205f0065fda2cc6859aa2f 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -35,7 +35,7 @@
  */
 	.macro  ftrace_regs_entry, allregs=0
 	/* Make room for pt_regs, plus a callee frame */
-	sub	sp, sp, #(S_FRAME_SIZE + 16)
+	sub	sp, sp, #(PT_REGS_SIZE + 16)
 
 	/* Save function arguments (and x9 for simplicity) */
 	stp	x0, x1, [sp, #S_X0]
@@ -61,15 +61,15 @@
 	.endif
 
 	/* Save the callsite's SP and LR */
-	add	x10, sp, #(S_FRAME_SIZE + 16)
+	add	x10, sp, #(PT_REGS_SIZE + 16)
 	stp	x9, x10, [sp, #S_LR]
 
 	/* Save the PC after the ftrace callsite */
 	str	x30, [sp, #S_PC]
 
 	/* Create a frame record for the callsite above pt_regs */
-	stp	x29, x9, [sp, #S_FRAME_SIZE]
-	add	x29, sp, #S_FRAME_SIZE
+	stp	x29, x9, [sp, #PT_REGS_SIZE]
+	add	x29, sp, #PT_REGS_SIZE
 
 	/* Create our frame record within pt_regs. */
 	stp	x29, x30, [sp, #S_STACKFRAME]
@@ -120,7 +120,7 @@ ftrace_common_return:
 	ldr	x9, [sp, #S_PC]
 
 	/* Restore the callsite's SP */
-	add	sp, sp, #S_FRAME_SIZE + 16
+	add	sp, sp, #PT_REGS_SIZE + 16
 
 	ret	x9
 SYM_CODE_END(ftrace_common)
@@ -130,7 +130,7 @@ SYM_CODE_START(ftrace_graph_caller)
 	ldr	x0, [sp, #S_PC]
 	sub	x0, x0, #AARCH64_INSN_SIZE	// ip (callsite's BL insn)
 	add	x1, sp, #S_LR			// parent_ip (callsite's LR)
-	ldr	x2, [sp, #S_FRAME_SIZE]	   	// parent fp (callsite's FP)
+	ldr	x2, [sp, #PT_REGS_SIZE]	   	// parent fp (callsite's FP)
 	bl	prepare_ftrace_return
 	b	ftrace_common_return
 SYM_CODE_END(ftrace_graph_caller)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index a8c3e7aaca749427ffd0a6aeb45f88dc3d23138a..c9bae73f2621a345d9b590cf896a2e8ecc5d47c4 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -75,7 +75,7 @@ alternative_else_nop_endif
 	.endif
 #endif
 
-	sub	sp, sp, #S_FRAME_SIZE
+	sub	sp, sp, #PT_REGS_SIZE
 #ifdef CONFIG_VMAP_STACK
 	/*
 	 * Test whether the SP has overflowed, without corrupting a GPR.
@@ -96,7 +96,7 @@ alternative_else_nop_endif
 	 * userspace, and can clobber EL0 registers to free up GPRs.
 	 */
 
-	/* Stash the original SP (minus S_FRAME_SIZE) in tpidr_el0. */
+	/* Stash the original SP (minus PT_REGS_SIZE) in tpidr_el0. */
 	msr	tpidr_el0, x0
 
 	/* Recover the original x0 value and stash it in tpidrro_el0 */
@@ -253,7 +253,7 @@ alternative_else_nop_endif
 
 	scs_load tsk, x20
 	.else
-	add	x21, sp, #S_FRAME_SIZE
+	add	x21, sp, #PT_REGS_SIZE
 	get_current_task tsk
 	.endif /* \el == 0 */
 	mrs	x22, elr_el1
@@ -377,7 +377,7 @@ alternative_else_nop_endif
 	ldp	x26, x27, [sp, #16 * 13]
 	ldp	x28, x29, [sp, #16 * 14]
 	ldr	lr, [sp, #S_LR]
-	add	sp, sp, #S_FRAME_SIZE		// restore sp
+	add	sp, sp, #PT_REGS_SIZE		// restore sp
 
 	.if	\el == 0
 alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
@@ -580,12 +580,12 @@ __bad_stack:
 
 	/*
 	 * Store the original GPRs to the new stack. The orginal SP (minus
-	 * S_FRAME_SIZE) was stashed in tpidr_el0 by kernel_ventry.
+	 * PT_REGS_SIZE) was stashed in tpidr_el0 by kernel_ventry.
 	 */
-	sub	sp, sp, #S_FRAME_SIZE
+	sub	sp, sp, #PT_REGS_SIZE
 	kernel_entry 1
 	mrs	x0, tpidr_el0
-	add	x0, x0, #S_FRAME_SIZE
+	add	x0, x0, #PT_REGS_SIZE
 	str	x0, [sp, #S_SP]
 
 	/* Stash the regs for handle_bad_stack */
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 38bb07eff8720d238e1d03759f59fb76e8f5ee05..3605f77ad4df1796c52b01a2bb959569d5ea1a83 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -23,8 +23,6 @@
 #include <linux/platform_device.h>
 #include <linux/sched_clock.h>
 #include <linux/smp.h>
-#include <linux/nmi.h>
-#include <linux/cpufreq.h>
 
 /* ARMv8 Cortex-A53 specific event types. */
 #define ARMV8_A53_PERFCTR_PREF_LINEFILL				0xC2
@@ -1250,21 +1248,10 @@ static struct platform_driver armv8_pmu_driver = {
 
 static int __init armv8_pmu_driver_init(void)
 {
-	int ret;
-
 	if (acpi_disabled)
-		ret = platform_driver_register(&armv8_pmu_driver);
+		return platform_driver_register(&armv8_pmu_driver);
 	else
-		ret = arm_pmu_acpi_probe(armv8_pmuv3_init);
-
-	/*
-	 * Try to re-initialize lockup detector after PMU init in
-	 * case PMU events are triggered via NMIs.
-	 */
-	if (ret == 0 && arm_pmu_irq_is_nmi())
-		lockup_detector_init();
-
-	return ret;
+		return arm_pmu_acpi_probe(armv8_pmuv3_init);
 }
 device_initcall(armv8_pmu_driver_init)
 
@@ -1322,27 +1309,3 @@ void arch_perf_update_userpage(struct perf_event *event,
 	userpg->cap_user_time_zero = 1;
 	userpg->cap_user_time_short = 1;
 }
-
-#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
-/*
- * Safe maximum CPU frequency in case a particular platform doesn't implement
- * cpufreq driver. Although, architecture doesn't put any restrictions on
- * maximum frequency but 5 GHz seems to be safe maximum given the available
- * Arm CPUs in the market which are clocked much less than 5 GHz. On the other
- * hand, we can't make it much higher as it would lead to a large hard-lockup
- * detection timeout on parts which are running slower (eg. 1GHz on
- * Developerbox) and doesn't possess a cpufreq driver.
- */
-#define SAFE_MAX_CPU_FREQ	5000000000UL // 5 GHz
-u64 hw_nmi_get_sample_period(int watchdog_thresh)
-{
-	unsigned int cpu = smp_processor_id();
-	unsigned long max_cpu_freq;
-
-	max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL;
-	if (!max_cpu_freq)
-		max_cpu_freq = SAFE_MAX_CPU_FREQ;
-
-	return (u64)max_cpu_freq * watchdog_thresh;
-}
-#endif
diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S
index 890ca72c5a5148a662c52de78f42c81c88abf420..288a84e253ccbecc227a8356e20819a1099266bd 100644
--- a/arch/arm64/kernel/probes/kprobes_trampoline.S
+++ b/arch/arm64/kernel/probes/kprobes_trampoline.S
@@ -25,7 +25,7 @@
 	stp x24, x25, [sp, #S_X24]
 	stp x26, x27, [sp, #S_X26]
 	stp x28, x29, [sp, #S_X28]
-	add x0, sp, #S_FRAME_SIZE
+	add x0, sp, #PT_REGS_SIZE
 	stp lr, x0, [sp, #S_LR]
 	/*
 	 * Construct a useful saved PSTATE
@@ -62,7 +62,7 @@
 	.endm
 
 SYM_CODE_START(kretprobe_trampoline)
-	sub sp, sp, #S_FRAME_SIZE
+	sub sp, sp, #PT_REGS_SIZE
 
 	save_all_base_regs
 
@@ -76,7 +76,7 @@ SYM_CODE_START(kretprobe_trampoline)
 
 	restore_all_base_regs
 
-	add sp, sp, #S_FRAME_SIZE
+	add sp, sp, #PT_REGS_SIZE
 	ret
 
 SYM_CODE_END(kretprobe_trampoline)
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index f71d6ce4673f53e5e935514791a52d6149b3b9e5..6237486ff6bb73db074d6e275c7d739548a1a7d0 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -914,13 +914,6 @@ static void do_signal(struct pt_regs *regs)
 asmlinkage void do_notify_resume(struct pt_regs *regs,
 				 unsigned long thread_flags)
 {
-	/*
-	 * The assembly code enters us with IRQs off, but it hasn't
-	 * informed the tracing code of that for efficiency reasons.
-	 * Update the trace code with the current status.
-	 */
-	trace_hardirqs_off();
-
 	do {
 		if (thread_flags & _TIF_NEED_RESCHED) {
 			/* Unmask Debug and SError for the next task */
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index f61e9d8cc55a1efe2cda0bb94f67177bb79acef6..c2877c332f2dc636a403193bff46e2df84001df5 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -9,6 +9,7 @@
 
 #include <asm/daifflags.h>
 #include <asm/debug-monitors.h>
+#include <asm/exception.h>
 #include <asm/fpsimd.h>
 #include <asm/syscall.h>
 #include <asm/thread_info.h>
@@ -165,15 +166,8 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
 	if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
 		local_daif_mask();
 		flags = current_thread_info()->flags;
-		if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP)) {
-			/*
-			 * We're off to userspace, where interrupts are
-			 * always enabled after we restore the flags from
-			 * the SPSR.
-			 */
-			trace_hardirqs_on();
+		if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP))
 			return;
-		}
 		local_daif_restore(DAIF_PROCCTX);
 	}
 
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 7deddf56f7c3ef3a17d1b87604088c2ed30922ba..709d98fea90cc1f8b08c85864fbaa6fd6a0b7d42 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -53,13 +53,13 @@ s64 memstart_addr __ro_after_init = -1;
 EXPORT_SYMBOL(memstart_addr);
 
 /*
- * We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA covers the first 1G of
- * memory as some devices, namely the Raspberry Pi 4, have peripherals with
- * this limited view of the memory. ZONE_DMA32 will cover the rest of the 32
- * bit addressable memory area.
+ * If the corresponding config options are enabled, we create both ZONE_DMA
+ * and ZONE_DMA32. By default ZONE_DMA covers the 32-bit addressable memory
+ * unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4).
+ * In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory,
+ * otherwise it is empty.
  */
 phys_addr_t arm64_dma_phys_limit __ro_after_init;
-phys_addr_t arm64_dma32_phys_limit __ro_after_init;
 
 #ifdef CONFIG_KEXEC_CORE
 /*
@@ -84,7 +84,7 @@ static void __init reserve_crashkernel(void)
 
 	if (crash_base == 0) {
 		/* Current arm64 boot protocol requires 2MB alignment */
-		crash_base = memblock_find_in_range(0, arm64_dma32_phys_limit,
+		crash_base = memblock_find_in_range(0, arm64_dma_phys_limit,
 				crash_size, SZ_2M);
 		if (crash_base == 0) {
 			pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
@@ -196,6 +196,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 	unsigned long max_zone_pfns[MAX_NR_ZONES]  = {0};
 	unsigned int __maybe_unused acpi_zone_dma_bits;
 	unsigned int __maybe_unused dt_zone_dma_bits;
+	phys_addr_t __maybe_unused dma32_phys_limit = max_zone_phys(32);
 
 #ifdef CONFIG_ZONE_DMA
 	acpi_zone_dma_bits = fls64(acpi_iort_dma_get_max_cpu_address());
@@ -205,8 +206,12 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 	max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit);
 #endif
 #ifdef CONFIG_ZONE_DMA32
-	max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma32_phys_limit);
+	max_zone_pfns[ZONE_DMA32] = PFN_DOWN(dma32_phys_limit);
+	if (!arm64_dma_phys_limit)
+		arm64_dma_phys_limit = dma32_phys_limit;
 #endif
+	if (!arm64_dma_phys_limit)
+		arm64_dma_phys_limit = PHYS_MASK + 1;
 	max_zone_pfns[ZONE_NORMAL] = max;
 
 	free_area_init(max_zone_pfns);
@@ -394,16 +399,9 @@ void __init arm64_memblock_init(void)
 
 	early_init_fdt_scan_reserved_mem();
 
-	if (IS_ENABLED(CONFIG_ZONE_DMA32))
-		arm64_dma32_phys_limit = max_zone_phys(32);
-	else
-		arm64_dma32_phys_limit = PHYS_MASK + 1;
-
 	reserve_elfcorehdr();
 
 	high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
-
-	dma_contiguous_reserve(arm64_dma32_phys_limit);
 }
 
 void __init bootmem_init(void)
@@ -438,6 +436,11 @@ void __init bootmem_init(void)
 	sparse_init();
 	zone_sizes_init(min, max);
 
+	/*
+	 * Reserve the CMA area after arm64_dma_phys_limit was initialised.
+	 */
+	dma_contiguous_reserve(arm64_dma_phys_limit);
+
 	/*
 	 * request_standard_resources() depends on crashkernel's memory being
 	 * reserved, so do it here.
@@ -455,7 +458,7 @@ void __init bootmem_init(void)
 void __init mem_init(void)
 {
 	if (swiotlb_force == SWIOTLB_FORCE ||
-	    max_pfn > PFN_DOWN(arm64_dma_phys_limit ? : arm64_dma32_phys_limit))
+	    max_pfn > PFN_DOWN(arm64_dma_phys_limit))
 		swiotlb_init(1);
 	else
 		swiotlb_force = SWIOTLB_NO_FORCE;
diff --git a/arch/ia64/include/asm/sparsemem.h b/arch/ia64/include/asm/sparsemem.h
index dd8c166ffd7b5ffbf56e3217a209512745d7e65b..42ed5248fae9876875a71f5b66c40a0a884c8749 100644
--- a/arch/ia64/include/asm/sparsemem.h
+++ b/arch/ia64/include/asm/sparsemem.h
@@ -3,6 +3,7 @@
 #define _ASM_IA64_SPARSEMEM_H
 
 #ifdef CONFIG_SPARSEMEM
+#include <asm/page.h>
 /*
  * SECTION_SIZE_BITS            2^N: how big each section will be
  * MAX_PHYSMEM_BITS             2^N: how much memory we can have in that space
diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c
index c61c641674e6b4765495cf4cc9751af7b54d539d..e3946b06e840a6b2e6c5a297e81f16db50a4c914 100644
--- a/arch/mips/boot/compressed/decompress.c
+++ b/arch/mips/boot/compressed/decompress.c
@@ -13,6 +13,7 @@
 #include <linux/libfdt.h>
 
 #include <asm/addrspace.h>
+#include <asm/unaligned.h>
 
 /*
  * These two variables specify the free mem region
@@ -117,7 +118,7 @@ void decompress_kernel(unsigned long boot_heap_start)
 		dtb_size = fdt_totalsize((void *)&__appended_dtb);
 
 		/* last four bytes is always image size in little endian */
-		image_size = le32_to_cpup((void *)&__image_end - 4);
+		image_size = get_unaligned_le32((void *)&__image_end - 4);
 
 		/* copy dtb to where the booted kernel will expect it */
 		memcpy((void *)VMLINUX_LOAD_ADDRESS_ULL + image_size,
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index bd47e15d02c73ea599e896a197500896ee7c174a..be5d4afcd30f930e6eb828d55a05099be8402a5d 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -1444,7 +1444,7 @@ static void octeon_irq_setup_secondary_ciu2(void)
 static int __init octeon_irq_init_ciu(
 	struct device_node *ciu_node, struct device_node *parent)
 {
-	unsigned int i, r;
+	int i, r;
 	struct irq_chip *chip;
 	struct irq_chip *chip_edge;
 	struct irq_chip *chip_mbox;
diff --git a/arch/mips/kernel/binfmt_elfn32.c b/arch/mips/kernel/binfmt_elfn32.c
index 6ee3f7218c675b6c605d06776ef1164ef6d13441..c4441416e96b6a28c91e884a9594f4b3dbb5efd9 100644
--- a/arch/mips/kernel/binfmt_elfn32.c
+++ b/arch/mips/kernel/binfmt_elfn32.c
@@ -103,4 +103,11 @@ jiffies_to_old_timeval32(unsigned long jiffies, struct old_timeval32 *value)
 #undef ns_to_kernel_old_timeval
 #define ns_to_kernel_old_timeval ns_to_old_timeval32
 
+/*
+ * Some data types as stored in coredump.
+ */
+#define user_long_t             compat_long_t
+#define user_siginfo_t          compat_siginfo_t
+#define copy_siginfo_to_external        copy_siginfo_to_external32
+
 #include "../../../fs/binfmt_elf.c"
diff --git a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c
index 6dd103d3cebba2f299fa60489b65c9377f975ee4..7b2a23f48c1ac227f6bad0eb4eadb068c28017ea 100644
--- a/arch/mips/kernel/binfmt_elfo32.c
+++ b/arch/mips/kernel/binfmt_elfo32.c
@@ -106,4 +106,11 @@ jiffies_to_old_timeval32(unsigned long jiffies, struct old_timeval32 *value)
 #undef ns_to_kernel_old_timeval
 #define ns_to_kernel_old_timeval ns_to_old_timeval32
 
+/*
+ * Some data types as stored in coredump.
+ */
+#define user_long_t             compat_long_t
+#define user_siginfo_t          compat_siginfo_t
+#define copy_siginfo_to_external        copy_siginfo_to_external32
+
 #include "../../../fs/binfmt_elf.c"
diff --git a/arch/mips/kernel/relocate.c b/arch/mips/kernel/relocate.c
index 47aeb3350a76029edaf96a03d29d8ce975058232..0e365b7c742d93b0a0c1ce06fdbcb0aa8b180dff 100644
--- a/arch/mips/kernel/relocate.c
+++ b/arch/mips/kernel/relocate.c
@@ -187,8 +187,14 @@ static int __init relocate_exception_table(long offset)
 static inline __init unsigned long rotate_xor(unsigned long hash,
 					      const void *area, size_t size)
 {
-	size_t i;
-	unsigned long *ptr = (unsigned long *)area;
+	const typeof(hash) *ptr = PTR_ALIGN(area, sizeof(hash));
+	size_t diff, i;
+
+	diff = (void *)ptr - area;
+	if (unlikely(size < diff + sizeof(hash)))
+		return hash;
+
+	size = ALIGN_DOWN(size - diff, sizeof(hash));
 
 	for (i = 0; i < size / sizeof(hash); i++) {
 		/* Rotate by odd number of bits and XOR. */
diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h
index 81671aa365b34d39dde25f31b69f19d211458354..77c635c2c90d43f2979006e280ba8e1071ea5b41 100644
--- a/arch/powerpc/include/asm/vdso/gettimeofday.h
+++ b/arch/powerpc/include/asm/vdso/gettimeofday.h
@@ -103,6 +103,8 @@ int gettimeofday_fallback(struct __kernel_old_timeval *_tv, struct timezone *_tz
 	return do_syscall_2(__NR_gettimeofday, (unsigned long)_tv, (unsigned long)_tz);
 }
 
+#ifdef __powerpc64__
+
 static __always_inline
 int clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
 {
@@ -115,10 +117,22 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
 	return do_syscall_2(__NR_clock_getres, _clkid, (unsigned long)_ts);
 }
 
-#ifdef CONFIG_VDSO32
+#else
 
 #define BUILD_VDSO32		1
 
+static __always_inline
+int clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+	return do_syscall_2(__NR_clock_gettime64, _clkid, (unsigned long)_ts);
+}
+
+static __always_inline
+int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+	return do_syscall_2(__NR_clock_getres_time64, _clkid, (unsigned long)_ts);
+}
+
 static __always_inline
 int clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
 {
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 8e0b1298bf19bb9c7d56447bb02eb31dec6d0dce..4ab426b8b0e02276423241b25d2386eedc24b90e 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -187,6 +187,12 @@ SECTIONS
 	.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
 		_sinittext = .;
 		INIT_TEXT
+
+		/*
+		 *.init.text might be RO so we must ensure this section ends on
+		 * a page boundary.
+		 */
+		. = ALIGN(PAGE_SIZE);
 		_einittext = .;
 #ifdef CONFIG_PPC64
 		*(.tramp.ftrace.init);
@@ -200,6 +206,8 @@ SECTIONS
 		EXIT_TEXT
 	}
 
+	. = ALIGN(PAGE_SIZE);
+
 	INIT_DATA_SECTION(16)
 
 	. = ALIGN(8);
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 81b76d44725d7c50d3cdf2f406705b03ba5d5b56..e9e2c1f0a690594389a2d034a56f59f8a376a89f 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -137,7 +137,7 @@ config PA_BITS
 
 config PAGE_OFFSET
 	hex
-	default 0xC0000000 if 32BIT && MAXPHYSMEM_2GB
+	default 0xC0000000 if 32BIT && MAXPHYSMEM_1GB
 	default 0x80000000 if 64BIT && !MMU
 	default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB
 	default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB
@@ -247,10 +247,12 @@ config MODULE_SECTIONS
 
 choice
 	prompt "Maximum Physical Memory"
-	default MAXPHYSMEM_2GB if 32BIT
+	default MAXPHYSMEM_1GB if 32BIT
 	default MAXPHYSMEM_2GB if 64BIT && CMODEL_MEDLOW
 	default MAXPHYSMEM_128GB if 64BIT && CMODEL_MEDANY
 
+	config MAXPHYSMEM_1GB
+		bool "1GiB"
 	config MAXPHYSMEM_2GB
 		bool "2GiB"
 	config MAXPHYSMEM_128GB
diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
index 4a2729f5ca3f0113be3e02e72f9c427fe02207cc..24d75a146e02d441c167b2b5028e3303eb80f1e3 100644
--- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
+++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
@@ -88,7 +88,9 @@ &eth0 {
 	phy-mode = "gmii";
 	phy-handle = <&phy0>;
 	phy0: ethernet-phy@0 {
+		compatible = "ethernet-phy-id0007.0771";
 		reg = <0>;
+		reset-gpios = <&gpio 12 GPIO_ACTIVE_LOW>;
 	};
 };
 
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index d222d353d86d40823e249bfdfa80e4a8bd3f4045..8c3d1e4517031ac56b07c1c1c393b0d7b64cee79 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -64,6 +64,8 @@ CONFIG_HW_RANDOM=y
 CONFIG_HW_RANDOM_VIRTIO=y
 CONFIG_SPI=y
 CONFIG_SPI_SIFIVE=y
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SIFIVE=y
 # CONFIG_PTP_1588_CLOCK is not set
 CONFIG_POWER_RESET=y
 CONFIG_DRM=y
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 41a72861987ccc9a0406b4926bd21e8e2c728ebb..251e1db088fa2d8508762bcc5e459f2e7b120684 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -99,7 +99,6 @@
 				| _PAGE_DIRTY)
 
 #define PAGE_KERNEL		__pgprot(_PAGE_KERNEL)
-#define PAGE_KERNEL_EXEC	__pgprot(_PAGE_KERNEL | _PAGE_EXEC)
 #define PAGE_KERNEL_READ	__pgprot(_PAGE_KERNEL & ~_PAGE_WRITE)
 #define PAGE_KERNEL_EXEC	__pgprot(_PAGE_KERNEL | _PAGE_EXEC)
 #define PAGE_KERNEL_READ_EXEC	__pgprot((_PAGE_KERNEL & ~_PAGE_WRITE) \
diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h
index 8454f746bbfd0f3ed6712537847deb332d0e32d0..1453a2f563bcc8180101763585e776c0abe5e426 100644
--- a/arch/riscv/include/asm/vdso.h
+++ b/arch/riscv/include/asm/vdso.h
@@ -10,7 +10,7 @@
 
 #include <linux/types.h>
 
-#ifndef GENERIC_TIME_VSYSCALL
+#ifndef CONFIG_GENERIC_TIME_VSYSCALL
 struct vdso_data {
 };
 #endif
diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c
index de59dd457b41584083dee7866e189bbb396945dd..d8678135704425973c3ba11d058dd56babef2481 100644
--- a/arch/riscv/kernel/cacheinfo.c
+++ b/arch/riscv/kernel/cacheinfo.c
@@ -26,7 +26,16 @@ cache_get_priv_group(struct cacheinfo *this_leaf)
 
 static struct cacheinfo *get_cacheinfo(u32 level, enum cache_type type)
 {
-	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(smp_processor_id());
+	/*
+	 * Using raw_smp_processor_id() elides a preemptability check, but this
+	 * is really indicative of a larger problem: the cacheinfo UABI assumes
+	 * that cores have a homonogenous view of the cache hierarchy.  That
+	 * happens to be the case for the current set of RISC-V systems, but
+	 * likely won't be true in general.  Since there's no way to provide
+	 * correct information for these systems via the current UABI we're
+	 * just eliding the check for now.
+	 */
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(raw_smp_processor_id());
 	struct cacheinfo *this_leaf;
 	int index;
 
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 524d918f3601b2f73b8d4cf88a1936ad507b5f15..744f3209c48d0b30d0e67f7b00f136b75d5e0fd0 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -124,15 +124,15 @@ skip_context_tracking:
 	REG_L a1, (a1)
 	jr a1
 1:
-#ifdef CONFIG_TRACE_IRQFLAGS
-	call trace_hardirqs_on
-#endif
 	/*
 	 * Exceptions run with interrupts enabled or disabled depending on the
 	 * state of SR_PIE in m/sstatus.
 	 */
 	andi t0, s1, SR_PIE
 	beqz t0, 1f
+#ifdef CONFIG_TRACE_IRQFLAGS
+	call trace_hardirqs_on
+#endif
 	csrs CSR_STATUS, SR_IE
 
 1:
@@ -155,6 +155,15 @@ skip_context_tracking:
 	tail do_trap_unknown
 
 handle_syscall:
+#ifdef CONFIG_RISCV_M_MODE
+	/*
+	 * When running is M-Mode (no MMU config), MPIE does not get set.
+	 * As a result, we need to force enable interrupts here because
+	 * handle_exception did not do set SR_IE as it always sees SR_PIE
+	 * being cleared.
+	 */
+	csrs CSR_STATUS, SR_IE
+#endif
 #if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING)
 	/* Recover a0 - a7 for system calls */
 	REG_L a0, PT_A0(sp)
@@ -186,14 +195,7 @@ check_syscall_nr:
 	 * Syscall number held in a7.
 	 * If syscall number is above allowed value, redirect to ni_syscall.
 	 */
-	bge a7, t0, 1f
-	/*
-	 * Check if syscall is rejected by tracer, i.e., a7 == -1.
-	 * If yes, we pretend it was executed.
-	 */
-	li t1, -1
-	beq a7, t1, ret_from_syscall_rejected
-	blt a7, t1, 1f
+	bgeu a7, t0, 1f
 	/* Call syscall */
 	la s0, sys_call_table
 	slli t0, a7, RISCV_LGPTR
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 1d85e9bf783cf907ba1e0ef17a0d529c1f9f5cb6..3fa3f26dde85664a6ccd993d9ee3bdcdf118eee3 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -127,7 +127,9 @@ static void __init init_resources(void)
 {
 	struct memblock_region *region = NULL;
 	struct resource *res = NULL;
-	int ret = 0;
+	struct resource *mem_res = NULL;
+	size_t mem_res_sz = 0;
+	int ret = 0, i = 0;
 
 	code_res.start = __pa_symbol(_text);
 	code_res.end = __pa_symbol(_etext) - 1;
@@ -145,16 +147,17 @@ static void __init init_resources(void)
 	bss_res.end = __pa_symbol(__bss_stop) - 1;
 	bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
+	mem_res_sz = (memblock.memory.cnt + memblock.reserved.cnt) * sizeof(*mem_res);
+	mem_res = memblock_alloc(mem_res_sz, SMP_CACHE_BYTES);
+	if (!mem_res)
+		panic("%s: Failed to allocate %zu bytes\n", __func__, mem_res_sz);
 	/*
 	 * Start by adding the reserved regions, if they overlap
 	 * with /memory regions, insert_resource later on will take
 	 * care of it.
 	 */
 	for_each_reserved_mem_region(region) {
-		res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);
-		if (!res)
-			panic("%s: Failed to allocate %zu bytes\n", __func__,
-			      sizeof(struct resource));
+		res = &mem_res[i++];
 
 		res->name = "Reserved";
 		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
@@ -171,8 +174,10 @@ static void __init init_resources(void)
 		 * Ignore any other reserved regions within
 		 * system memory.
 		 */
-		if (memblock_is_memory(res->start))
+		if (memblock_is_memory(res->start)) {
+			memblock_free((phys_addr_t) res, sizeof(struct resource));
 			continue;
+		}
 
 		ret = add_resource(&iomem_resource, res);
 		if (ret < 0)
@@ -181,10 +186,7 @@ static void __init init_resources(void)
 
 	/* Add /memory regions to the resource tree */
 	for_each_mem_region(region) {
-		res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);
-		if (!res)
-			panic("%s: Failed to allocate %zu bytes\n", __func__,
-			      sizeof(struct resource));
+		res = &mem_res[i++];
 
 		if (unlikely(memblock_is_nomap(region))) {
 			res->name = "Reserved";
@@ -205,9 +207,9 @@ static void __init init_resources(void)
 	return;
 
  error:
-	memblock_free((phys_addr_t) res, sizeof(struct resource));
 	/* Better an empty resource tree than an inconsistent one */
 	release_child_resources(&iomem_resource);
+	memblock_free((phys_addr_t) mem_res, mem_res_sz);
 }
 
 
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index 48b870a685b3092f863474539ffd7ecfd00a0ffe..df5d2da7c40be3fa9ec41e464911277b98321db3 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -14,7 +14,7 @@
 
 #include <asm/stacktrace.h>
 
-register unsigned long sp_in_global __asm__("sp");
+register const unsigned long sp_in_global __asm__("sp");
 
 #ifdef CONFIG_FRAME_POINTER
 
@@ -28,9 +28,8 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
 		sp = user_stack_pointer(regs);
 		pc = instruction_pointer(regs);
 	} else if (task == NULL || task == current) {
-		const register unsigned long current_sp = sp_in_global;
 		fp = (unsigned long)__builtin_frame_address(0);
-		sp = current_sp;
+		sp = sp_in_global;
 		pc = (unsigned long)walk_stackframe;
 	} else {
 		/* task blocked in __switch_to */
diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c
index 4d3a1048ad8b1cbc690c75321933e9057a2b69b6..8a5cf99c07762403ebd55ba6f54e03005e28b7e4 100644
--- a/arch/riscv/kernel/time.c
+++ b/arch/riscv/kernel/time.c
@@ -4,6 +4,7 @@
  * Copyright (C) 2017 SiFive
  */
 
+#include <linux/of_clk.h>
 #include <linux/clocksource.h>
 #include <linux/delay.h>
 #include <asm/sbi.h>
@@ -24,6 +25,8 @@ void __init time_init(void)
 	riscv_timebase = prop;
 
 	lpj_fine = riscv_timebase / HZ;
+
+	of_clk_init(NULL);
 	timer_probe();
 }
 
diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
index 678204231700cad475be9334ab77cd46cefc6fdb..3f1d35e7c98a62d9b3113999e45938e2b97dfbab 100644
--- a/arch/riscv/kernel/vdso.c
+++ b/arch/riscv/kernel/vdso.c
@@ -12,7 +12,7 @@
 #include <linux/binfmts.h>
 #include <linux/err.h>
 #include <asm/page.h>
-#ifdef GENERIC_TIME_VSYSCALL
+#ifdef CONFIG_GENERIC_TIME_VSYSCALL
 #include <vdso/datapage.h>
 #else
 #include <asm/vdso.h>
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index bf5379135e39b628619de135632876d5338abe9d..7cd4993f4ff21a932b6ef29512f685ea4e947f9a 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -157,9 +157,10 @@ static void __init setup_initrd(void)
 void __init setup_bootmem(void)
 {
 	phys_addr_t mem_start = 0;
-	phys_addr_t start, end = 0;
+	phys_addr_t start, dram_end, end = 0;
 	phys_addr_t vmlinux_end = __pa_symbol(&_end);
 	phys_addr_t vmlinux_start = __pa_symbol(&_start);
+	phys_addr_t max_mapped_addr = __pa(~(ulong)0);
 	u64 i;
 
 	/* Find the memory region containing the kernel */
@@ -181,7 +182,18 @@ void __init setup_bootmem(void)
 	/* Reserve from the start of the kernel to the end of the kernel */
 	memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
 
-	max_pfn = PFN_DOWN(memblock_end_of_DRAM());
+	dram_end = memblock_end_of_DRAM();
+
+	/*
+	 * memblock allocator is not aware of the fact that last 4K bytes of
+	 * the addressable memory can not be mapped because of IS_ERR_VALUE
+	 * macro. Make sure that last 4k bytes are not usable by memblock
+	 * if end of dram is equal to maximum addressable memory.
+	 */
+	if (max_mapped_addr == (dram_end - 1))
+		memblock_set_current_limit(max_mapped_addr - 4096);
+
+	max_pfn = PFN_DOWN(dram_end);
 	max_low_pfn = max_pfn;
 	dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn));
 	set_max_mapnr(max_low_pfn);
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index 12ddd1f6bf70c8e96ac92b02e0b37c8e22c51043..a8a2ffd9114aaa22c60661c7cd2d80023747cb2e 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -93,8 +93,8 @@ void __init kasan_init(void)
 								VMALLOC_END));
 
 	for_each_mem_range(i, &_start, &_end) {
-		void *start = (void *)_start;
-		void *end = (void *)_end;
+		void *start = (void *)__va(_start);
+		void *end = (void *)__va(_end);
 
 		if (start >= end)
 			break;
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 4638a52d8eaeae7419a2881e0452237a7251f9e6..6375967a8244dc4e6777d8ff95de1be33ea77e06 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -315,6 +315,25 @@ static struct syscore_ops hv_syscore_ops = {
 	.resume		= hv_resume,
 };
 
+static void (* __initdata old_setup_percpu_clockev)(void);
+
+static void __init hv_stimer_setup_percpu_clockev(void)
+{
+	/*
+	 * Ignore any errors in setting up stimer clockevents
+	 * as we can run with the LAPIC timer as a fallback.
+	 */
+	(void)hv_stimer_alloc();
+
+	/*
+	 * Still register the LAPIC timer, because the direct-mode STIMER is
+	 * not supported by old versions of Hyper-V. This also allows users
+	 * to switch to LAPIC timer via /sys, if they want to.
+	 */
+	if (old_setup_percpu_clockev)
+		old_setup_percpu_clockev();
+}
+
 /*
  * This function is to be invoked early in the boot sequence after the
  * hypervisor has been detected.
@@ -393,10 +412,14 @@ void __init hyperv_init(void)
 	wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
 
 	/*
-	 * Ignore any errors in setting up stimer clockevents
-	 * as we can run with the LAPIC timer as a fallback.
+	 * hyperv_init() is called before LAPIC is initialized: see
+	 * apic_intr_mode_init() -> x86_platform.apic_post_init() and
+	 * apic_bsp_setup() -> setup_local_APIC(). The direct-mode STIMER
+	 * depends on LAPIC, so hv_stimer_alloc() should be called from
+	 * x86_init.timers.setup_percpu_clockev.
 	 */
-	(void)hv_stimer_alloc();
+	old_setup_percpu_clockev = x86_init.timers.setup_percpu_clockev;
+	x86_init.timers.setup_percpu_clockev = hv_stimer_setup_percpu_clockev;
 
 	hv_apic_init();
 
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index 9e87ab010c82bbbe02798d17be75ba6171d199f6..e68ea5f4ad1ce0c3bfadafec4e01dc44fc2664e5 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -164,10 +164,10 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu)
 	else
 		per_cpu(xen_vcpu_id, cpu) = cpu;
 	rc = xen_vcpu_setup(cpu);
-	if (rc)
+	if (rc || !xen_have_vector_callback)
 		return rc;
 
-	if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock))
+	if (xen_feature(XENFEAT_hvm_safe_pvclock))
 		xen_setup_timer(cpu);
 
 	rc = xen_smp_intr_init(cpu);
@@ -188,6 +188,8 @@ static int xen_cpu_dead_hvm(unsigned int cpu)
        return 0;
 }
 
+static bool no_vector_callback __initdata;
+
 static void __init xen_hvm_guest_init(void)
 {
 	if (xen_pv_domain())
@@ -207,7 +209,7 @@ static void __init xen_hvm_guest_init(void)
 
 	xen_panic_handler_init();
 
-	if (xen_feature(XENFEAT_hvm_callback_vector))
+	if (!no_vector_callback && xen_feature(XENFEAT_hvm_callback_vector))
 		xen_have_vector_callback = 1;
 
 	xen_hvm_smp_init();
@@ -233,6 +235,13 @@ static __init int xen_parse_nopv(char *arg)
 }
 early_param("xen_nopv", xen_parse_nopv);
 
+static __init int xen_parse_no_vector_callback(char *arg)
+{
+	no_vector_callback = true;
+	return 0;
+}
+early_param("xen_no_vector_callback", xen_parse_no_vector_callback);
+
 bool __init xen_hvm_need_lapic(void)
 {
 	if (xen_pv_domain())
diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c
index f5e7db4f82abb63039ce49271ecbe87f857a1655..6ff3c887e0b99523cd69774b8de8f3009f69d92f 100644
--- a/arch/x86/xen/smp_hvm.c
+++ b/arch/x86/xen/smp_hvm.c
@@ -33,9 +33,11 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
 	int cpu;
 
 	native_smp_prepare_cpus(max_cpus);
-	WARN_ON(xen_smp_intr_init(0));
 
-	xen_init_lock_cpu(0);
+	if (xen_have_vector_callback) {
+		WARN_ON(xen_smp_intr_init(0));
+		xen_init_lock_cpu(0);
+	}
 
 	for_each_possible_cpu(cpu) {
 		if (cpu == 0)
@@ -50,9 +52,11 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
 static void xen_hvm_cpu_die(unsigned int cpu)
 {
 	if (common_cpu_die(cpu) == 0) {
-		xen_smp_intr_free(cpu);
-		xen_uninit_lock_cpu(cpu);
-		xen_teardown_timer(cpu);
+		if (xen_have_vector_callback) {
+			xen_smp_intr_free(cpu);
+			xen_uninit_lock_cpu(cpu);
+			xen_teardown_timer(cpu);
+		}
 	}
 }
 #else
@@ -64,14 +68,19 @@ static void xen_hvm_cpu_die(unsigned int cpu)
 
 void __init xen_hvm_smp_init(void)
 {
-	if (!xen_have_vector_callback)
+	smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu;
+	smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
+	smp_ops.smp_cpus_done = xen_smp_cpus_done;
+	smp_ops.cpu_die = xen_hvm_cpu_die;
+
+	if (!xen_have_vector_callback) {
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+		nopvspin = true;
+#endif
 		return;
+	}
 
-	smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
 	smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
-	smp_ops.cpu_die = xen_hvm_cpu_die;
 	smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
 	smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
-	smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu;
-	smp_ops.smp_cpus_done = xen_smp_cpus_done;
 }
diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index 8892908ad58ce40c2b868c48c957cc49c49d1ad9..788a4ba1e2e747de90b600b56a5b610ba4eb935b 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -356,7 +356,8 @@ int public_key_verify_signature(const struct public_key *pkey,
 	if (ret)
 		goto error_free_key;
 
-	if (strcmp(sig->pkey_algo, "sm2") == 0 && sig->data_size) {
+	if (sig->pkey_algo && strcmp(sig->pkey_algo, "sm2") == 0 &&
+	    sig->data_size) {
 		ret = cert_sig_digest_update(sig, tfm);
 		if (ret)
 			goto error_free_key;
diff --git a/crypto/xor.c b/crypto/xor.c
index eacbf4f939900fe4f280f053c5ee1ac45a75a0c2..8f899f898ec9f903734693cd6f4ba8a593bbcc14 100644
--- a/crypto/xor.c
+++ b/crypto/xor.c
@@ -107,6 +107,8 @@ do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
 	preempt_enable();
 
 	// bytes/ns == GB/s, multiply by 1000 to get MB/s [not MiB/s]
+	if (!min)
+		min = 1;
 	speed = (1000 * REPS * BENCH_SIZE) / (unsigned int)ktime_to_ns(min);
 	tmpl->speed = speed;
 
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index cb229e24c56375b0a89255667436abc46fa9b0d3..e6a5d997241c43d803d9758b01328db8b6cdb0cc 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -97,7 +97,7 @@ void acpi_scan_table_handler(u32 event, void *table, void *context);
 extern struct list_head acpi_bus_id_list;
 
 struct acpi_device_bus_id {
-	char bus_id[15];
+	const char *bus_id;
 	unsigned int instance_no;
 	struct list_head node;
 };
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 80b668c80073a58417e6bd6a2ad0d1ddc0f28d31..58ff36340cd7c595521e753897da6d2dfc2a91be 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -486,6 +486,7 @@ static void acpi_device_del(struct acpi_device *device)
 				acpi_device_bus_id->instance_no--;
 			else {
 				list_del(&acpi_device_bus_id->node);
+				kfree_const(acpi_device_bus_id->bus_id);
 				kfree(acpi_device_bus_id);
 			}
 			break;
@@ -674,7 +675,14 @@ int acpi_device_add(struct acpi_device *device,
 	}
 	if (!found) {
 		acpi_device_bus_id = new_bus_id;
-		strcpy(acpi_device_bus_id->bus_id, acpi_device_hid(device));
+		acpi_device_bus_id->bus_id =
+			kstrdup_const(acpi_device_hid(device), GFP_KERNEL);
+		if (!acpi_device_bus_id->bus_id) {
+			pr_err(PREFIX "Memory allocation error for bus id\n");
+			result = -ENOMEM;
+			goto err_free_new_bus_id;
+		}
+
 		acpi_device_bus_id->instance_no = 0;
 		list_add_tail(&acpi_device_bus_id->node, &acpi_bus_id_list);
 	}
@@ -709,6 +717,11 @@ int acpi_device_add(struct acpi_device *device,
 	if (device->parent)
 		list_del(&device->node);
 	list_del(&device->wakeup_list);
+
+ err_free_new_bus_id:
+	if (!found)
+		kfree(new_bus_id);
+
 	mutex_unlock(&acpi_device_lock);
 
  err_detach:
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index bbd51703e738b972a9437476e8cb4b60b31f5c8f..e535f28a80283e36db6ca9d1a9d484078ca40c0d 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -366,6 +366,7 @@ if CRYPTO_DEV_OMAP
 config CRYPTO_DEV_OMAP_SHAM
 	tristate "Support for OMAP MD5/SHA1/SHA2 hw accelerator"
 	depends on ARCH_OMAP2PLUS
+	select CRYPTO_ENGINE
 	select CRYPTO_SHA1
 	select CRYPTO_MD5
 	select CRYPTO_SHA256
diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c
index 3c4e343011721e336869567f5292959ec634b1a1..364fc2f3e499549cd3eb89673b1a3dffd8af7977 100644
--- a/drivers/dma-buf/heaps/cma_heap.c
+++ b/drivers/dma-buf/heaps/cma_heap.c
@@ -251,6 +251,9 @@ static void cma_heap_dma_buf_release(struct dma_buf *dmabuf)
 		buffer->vaddr = NULL;
 	}
 
+	/* free page list */
+	kfree(buffer->pages);
+	/* release memory */
 	cma_release(cma_heap->cma, buffer->cma_pages, buffer->pagecount);
 	kfree(buffer);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index 306077884a6794e5709efcefea25dea0554b3a33..6107ac91db250731468f7e86b56ffbe63daca372 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -112,6 +112,7 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
 union igp_info {
 	struct atom_integrated_system_info_v1_11 v11;
 	struct atom_integrated_system_info_v1_12 v12;
+	struct atom_integrated_system_info_v2_1 v21;
 };
 
 union umc_info {
@@ -209,24 +210,42 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
 		if (adev->flags & AMD_IS_APU) {
 			igp_info = (union igp_info *)
 				(mode_info->atom_context->bios + data_offset);
-			switch (crev) {
-			case 11:
-				mem_channel_number = igp_info->v11.umachannelnumber;
-				/* channel width is 64 */
-				if (vram_width)
-					*vram_width = mem_channel_number * 64;
-				mem_type = igp_info->v11.memorytype;
-				if (vram_type)
-					*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+			switch (frev) {
+			case 1:
+				switch (crev) {
+				case 11:
+				case 12:
+					mem_channel_number = igp_info->v11.umachannelnumber;
+					if (!mem_channel_number)
+						mem_channel_number = 1;
+					/* channel width is 64 */
+					if (vram_width)
+						*vram_width = mem_channel_number * 64;
+					mem_type = igp_info->v11.memorytype;
+					if (vram_type)
+						*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+					break;
+				default:
+					return -EINVAL;
+				}
 				break;
-			case 12:
-				mem_channel_number = igp_info->v12.umachannelnumber;
-				/* channel width is 64 */
-				if (vram_width)
-					*vram_width = mem_channel_number * 64;
-				mem_type = igp_info->v12.memorytype;
-				if (vram_type)
-					*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+			case 2:
+				switch (crev) {
+				case 1:
+				case 2:
+					mem_channel_number = igp_info->v21.umachannelnumber;
+					if (!mem_channel_number)
+						mem_channel_number = 1;
+					/* channel width is 64 */
+					if (vram_width)
+						*vram_width = mem_channel_number * 64;
+					mem_type = igp_info->v21.memorytype;
+					if (vram_type)
+						*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+					break;
+				default:
+					return -EINVAL;
+				}
 				break;
 			default:
 				return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index b69c34074d8d7a02f9fb23ee7fef394561f37978..087afab67e2216c993c6fc0f7b8f1ff9294b1e03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3034,7 +3034,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
 #endif
 	default:
 		if (amdgpu_dc > 0)
-			DRM_INFO("Display Core has been requested via kernel parameter "
+			DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
 					 "but isn't supported by ASIC, ignoring\n");
 		return false;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 72efd579ec5ee198ef2c100f38faefa5f60641e0..7169fb5e3d9c47f0c8fd575b11544c31cd83cdb9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1085,6 +1085,8 @@ static const struct pci_device_id pciidlist[] = {
 
 	/* Renoir */
 	{0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
+	{0x1002, 0x1638, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
+	{0x1002, 0x164C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
 
 	/* Navi12 */
 	{0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12},
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index ba108678452551f0917d01ff67cf6c186822a171..619d34c041ee025f89a4bf6f892e04df78c64a56 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -99,6 +99,10 @@
 #define mmGCR_GENERAL_CNTL_Sienna_Cichlid			0x1580
 #define mmGCR_GENERAL_CNTL_Sienna_Cichlid_BASE_IDX	0
 
+#define mmGOLDEN_TSC_COUNT_UPPER_Vangogh                0x0025
+#define mmGOLDEN_TSC_COUNT_UPPER_Vangogh_BASE_IDX       1
+#define mmGOLDEN_TSC_COUNT_LOWER_Vangogh                0x0026
+#define mmGOLDEN_TSC_COUNT_LOWER_Vangogh_BASE_IDX       1
 #define mmSPI_CONFIG_CNTL_1_Vangogh		 0x2441
 #define mmSPI_CONFIG_CNTL_1_Vangogh_BASE_IDX	 1
 #define mmVGT_TF_MEMORY_BASE_HI_Vangogh          0x2261
@@ -160,6 +164,9 @@
 #define mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid          0x15db
 #define mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid_BASE_IDX	0
 
+#define mmGC_THROTTLE_CTRL_Sienna_Cichlid              0x2030
+#define mmGC_THROTTLE_CTRL_Sienna_Cichlid_BASE_IDX     0
+
 MODULE_FIRMWARE("amdgpu/navi10_ce.bin");
 MODULE_FIRMWARE("amdgpu/navi10_pfp.bin");
 MODULE_FIRMWARE("amdgpu/navi10_me.bin");
@@ -3324,6 +3331,7 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
 static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
 static u32 gfx_v10_3_get_disabled_sa(struct amdgpu_device *adev);
 static void gfx_v10_3_program_pbb_mode(struct amdgpu_device *adev);
+static void gfx_v10_3_set_power_brake_sequence(struct amdgpu_device *adev);
 
 static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
 {
@@ -7192,6 +7200,9 @@ static int gfx_v10_0_hw_init(void *handle)
 	if (adev->asic_type == CHIP_SIENNA_CICHLID)
 		gfx_v10_3_program_pbb_mode(adev);
 
+	if (adev->asic_type >= CHIP_SIENNA_CICHLID)
+		gfx_v10_3_set_power_brake_sequence(adev);
+
 	return r;
 }
 
@@ -7377,8 +7388,16 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
 
 	amdgpu_gfx_off_ctrl(adev, false);
 	mutex_lock(&adev->gfx.gpu_clock_mutex);
-	clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER) |
-		((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER) << 32ULL);
+	switch (adev->asic_type) {
+	case CHIP_VANGOGH:
+		clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh) |
+			((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh) << 32ULL);
+		break;
+	default:
+		clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER) |
+			((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER) << 32ULL);
+		break;
+	}
 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
 	amdgpu_gfx_off_ctrl(adev, true);
 	return clock;
@@ -9169,6 +9188,31 @@ static void gfx_v10_3_program_pbb_mode(struct amdgpu_device *adev)
 	}
 }
 
+static void gfx_v10_3_set_power_brake_sequence(struct amdgpu_device *adev)
+{
+	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX,
+		     (0x1 << GRBM_GFX_INDEX__SA_BROADCAST_WRITES__SHIFT) |
+		     (0x1 << GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES__SHIFT) |
+		     (0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT));
+
+	WREG32_SOC15(GC, 0, mmGC_CAC_IND_INDEX, ixPWRBRK_STALL_PATTERN_CTRL);
+	WREG32_SOC15(GC, 0, mmGC_CAC_IND_DATA,
+		     (0x1 << PWRBRK_STALL_PATTERN_CTRL__PWRBRK_STEP_INTERVAL__SHIFT) |
+		     (0x12 << PWRBRK_STALL_PATTERN_CTRL__PWRBRK_BEGIN_STEP__SHIFT) |
+		     (0x13 << PWRBRK_STALL_PATTERN_CTRL__PWRBRK_END_STEP__SHIFT) |
+		     (0xf << PWRBRK_STALL_PATTERN_CTRL__PWRBRK_THROTTLE_PATTERN_BIT_NUMS__SHIFT));
+
+	WREG32_SOC15(GC, 0, mmGC_THROTTLE_CTRL_Sienna_Cichlid,
+		     (0x1 << GC_THROTTLE_CTRL__PWRBRK_STALL_EN__SHIFT) |
+		     (0x1 << GC_THROTTLE_CTRL__PATTERN_MODE__SHIFT) |
+		     (0x5 << GC_THROTTLE_CTRL__RELEASE_STEP_INTERVAL__SHIFT));
+
+	WREG32_SOC15(GC, 0, mmDIDT_IND_INDEX, ixDIDT_SQ_THROTTLE_CTRL);
+
+	WREG32_SOC15(GC, 0, mmDIDT_IND_DATA,
+		     (0x1 << DIDT_SQ_THROTTLE_CTRL__PWRBRK_STALL_EN__SHIFT));
+}
+
 const struct amdgpu_ip_block_version gfx_v10_0_ip_block =
 {
 	.type = AMD_IP_BLOCK_TYPE_GFX,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index d65a5339d354ac268760c7b88e74fbdb174ea0dc..3ba7bdfde65d81f8f0c91c88b93f1ce15d33cd47 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -47,7 +47,7 @@ enum psp_gfx_crtl_cmd_id
     GFX_CTRL_CMD_ID_DISABLE_INT     = 0x00060000,   /* disable PSP-to-Gfx interrupt */
     GFX_CTRL_CMD_ID_MODE1_RST       = 0x00070000,   /* trigger the Mode 1 reset */
     GFX_CTRL_CMD_ID_GBR_IH_SET      = 0x00080000,   /* set Gbr IH_RB_CNTL registers */
-    GFX_CTRL_CMD_ID_CONSUME_CMD     = 0x000A0000,   /* send interrupt to psp for updating write pointer of vf */
+    GFX_CTRL_CMD_ID_CONSUME_CMD     = 0x00090000,   /* send interrupt to psp for updating write pointer of vf */
     GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING = 0x000C0000, /* destroy GPCOM ring */
 
     GFX_CTRL_CMD_ID_MAX             = 0x000F0000,   /* max command ID */
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 8a23636ecc27ff8577997ce207733d7f240e7ecf..0b3516c4eefb33881c235ba44d8338c0cef2515e 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1239,7 +1239,8 @@ static int soc15_common_early_init(void *handle)
 		break;
 	case CHIP_RENOIR:
 		adev->asic_funcs = &soc15_asic_funcs;
-		if (adev->pdev->device == 0x1636)
+		if ((adev->pdev->device == 0x1636) ||
+		    (adev->pdev->device == 0x164c))
 			adev->apu_flags |= AMD_APU_IS_RENOIR;
 		else
 			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 8cac497c2c459aa86421ddbce9eab12d6544ce03..a5640a6138cf2cb019cb00d12a08d4f6faee2ad6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1040,11 +1040,14 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
 				(struct crat_subtype_iolink *)sub_type_hdr);
 		if (ret < 0)
 			return ret;
-		crat_table->length += (sub_type_hdr->length * entries);
-		crat_table->total_entries += entries;
 
-		sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
-				sub_type_hdr->length * entries);
+		if (entries) {
+			crat_table->length += (sub_type_hdr->length * entries);
+			crat_table->total_entries += entries;
+
+			sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+					sub_type_hdr->length * entries);
+		}
 #else
 		pr_info("IO link not available for non x86 platforms\n");
 #endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 146486071d01d88812919e16c1de2843aa1b809a..c6da89df055de87c89ba0ba05ed8739993a1c97b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -939,41 +939,6 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_
 }
 #endif
 
-#ifdef CONFIG_DEBUG_FS
-static int create_crtc_crc_properties(struct amdgpu_display_manager *dm)
-{
-	dm->crc_win_x_start_property =
-		drm_property_create_range(adev_to_drm(dm->adev),
-					  DRM_MODE_PROP_ATOMIC,
-					  "AMD_CRC_WIN_X_START", 0, U16_MAX);
-	if (!dm->crc_win_x_start_property)
-		return -ENOMEM;
-
-	dm->crc_win_y_start_property =
-		drm_property_create_range(adev_to_drm(dm->adev),
-					  DRM_MODE_PROP_ATOMIC,
-					  "AMD_CRC_WIN_Y_START", 0, U16_MAX);
-	if (!dm->crc_win_y_start_property)
-		return -ENOMEM;
-
-	dm->crc_win_x_end_property =
-		drm_property_create_range(adev_to_drm(dm->adev),
-					  DRM_MODE_PROP_ATOMIC,
-					  "AMD_CRC_WIN_X_END", 0, U16_MAX);
-	if (!dm->crc_win_x_end_property)
-		return -ENOMEM;
-
-	dm->crc_win_y_end_property =
-		drm_property_create_range(adev_to_drm(dm->adev),
-					  DRM_MODE_PROP_ATOMIC,
-					  "AMD_CRC_WIN_Y_END", 0, U16_MAX);
-	if (!dm->crc_win_y_end_property)
-		return -ENOMEM;
-
-	return 0;
-}
-#endif
-
 static int amdgpu_dm_init(struct amdgpu_device *adev)
 {
 	struct dc_init_data init_data;
@@ -1120,10 +1085,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 
 		dc_init_callbacks(adev->dm.dc, &init_params);
 	}
-#endif
-#ifdef CONFIG_DEBUG_FS
-	if (create_crtc_crc_properties(&adev->dm))
-		DRM_ERROR("amdgpu: failed to create crc property.\n");
 #endif
 	if (amdgpu_dm_initialize_drm_device(adev)) {
 		DRM_ERROR(
@@ -5333,64 +5294,12 @@ dm_crtc_duplicate_state(struct drm_crtc *crtc)
 	state->crc_src = cur->crc_src;
 	state->cm_has_degamma = cur->cm_has_degamma;
 	state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb;
-#ifdef CONFIG_DEBUG_FS
-	state->crc_window = cur->crc_window;
-#endif
+
 	/* TODO Duplicate dc_stream after objects are stream object is flattened */
 
 	return &state->base;
 }
 
-#ifdef CONFIG_DEBUG_FS
-static int amdgpu_dm_crtc_atomic_set_property(struct drm_crtc *crtc,
-					    struct drm_crtc_state *crtc_state,
-					    struct drm_property *property,
-					    uint64_t val)
-{
-	struct drm_device *dev = crtc->dev;
-	struct amdgpu_device *adev = drm_to_adev(dev);
-	struct dm_crtc_state *dm_new_state =
-		to_dm_crtc_state(crtc_state);
-
-	if (property == adev->dm.crc_win_x_start_property)
-		dm_new_state->crc_window.x_start = val;
-	else if (property == adev->dm.crc_win_y_start_property)
-		dm_new_state->crc_window.y_start = val;
-	else if (property == adev->dm.crc_win_x_end_property)
-		dm_new_state->crc_window.x_end = val;
-	else if (property == adev->dm.crc_win_y_end_property)
-		dm_new_state->crc_window.y_end = val;
-	else
-		return -EINVAL;
-
-	return 0;
-}
-
-static int amdgpu_dm_crtc_atomic_get_property(struct drm_crtc *crtc,
-					    const struct drm_crtc_state *state,
-					    struct drm_property *property,
-					    uint64_t *val)
-{
-	struct drm_device *dev = crtc->dev;
-	struct amdgpu_device *adev = drm_to_adev(dev);
-	struct dm_crtc_state *dm_state =
-		to_dm_crtc_state(state);
-
-	if (property == adev->dm.crc_win_x_start_property)
-		*val = dm_state->crc_window.x_start;
-	else if (property == adev->dm.crc_win_y_start_property)
-		*val = dm_state->crc_window.y_start;
-	else if (property == adev->dm.crc_win_x_end_property)
-		*val = dm_state->crc_window.x_end;
-	else if (property == adev->dm.crc_win_y_end_property)
-		*val = dm_state->crc_window.y_end;
-	else
-		return -EINVAL;
-
-	return 0;
-}
-#endif
-
 static inline int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable)
 {
 	enum dc_irq_source irq_source;
@@ -5457,10 +5366,6 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
 	.enable_vblank = dm_enable_vblank,
 	.disable_vblank = dm_disable_vblank,
 	.get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp,
-#ifdef CONFIG_DEBUG_FS
-	.atomic_set_property = amdgpu_dm_crtc_atomic_set_property,
-	.atomic_get_property = amdgpu_dm_crtc_atomic_get_property,
-#endif
 };
 
 static enum drm_connector_status
@@ -6662,25 +6567,6 @@ static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
 	return 0;
 }
 
-#ifdef CONFIG_DEBUG_FS
-static void attach_crtc_crc_properties(struct amdgpu_display_manager *dm,
-				struct amdgpu_crtc *acrtc)
-{
-	drm_object_attach_property(&acrtc->base.base,
-				   dm->crc_win_x_start_property,
-				   0);
-	drm_object_attach_property(&acrtc->base.base,
-				   dm->crc_win_y_start_property,
-				   0);
-	drm_object_attach_property(&acrtc->base.base,
-				   dm->crc_win_x_end_property,
-				   0);
-	drm_object_attach_property(&acrtc->base.base,
-				   dm->crc_win_y_end_property,
-				   0);
-}
-#endif
-
 static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
 			       struct drm_plane *plane,
 			       uint32_t crtc_index)
@@ -6728,9 +6614,7 @@ static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
 	drm_crtc_enable_color_mgmt(&acrtc->base, MAX_COLOR_LUT_ENTRIES,
 				   true, MAX_COLOR_LUT_ENTRIES);
 	drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES);
-#ifdef CONFIG_DEBUG_FS
-	attach_crtc_crc_properties(dm, acrtc);
-#endif
+
 	return 0;
 
 fail:
@@ -8367,7 +8251,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 	 */
 	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
 		struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
-		bool configure_crc = false;
 
 		dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
 
@@ -8377,27 +8260,20 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 			dc_stream_retain(dm_new_crtc_state->stream);
 			acrtc->dm_irq_params.stream = dm_new_crtc_state->stream;
 			manage_dm_interrupts(adev, acrtc, true);
-		}
-		if (IS_ENABLED(CONFIG_DEBUG_FS) && new_crtc_state->active &&
-			amdgpu_dm_is_valid_crc_source(dm_new_crtc_state->crc_src)) {
+
+#ifdef CONFIG_DEBUG_FS
 			/**
 			 * Frontend may have changed so reapply the CRC capture
 			 * settings for the stream.
 			 */
 			dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
-			dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
 
-			if (amdgpu_dm_crc_window_is_default(dm_new_crtc_state)) {
-				if (!old_crtc_state->active || drm_atomic_crtc_needs_modeset(new_crtc_state))
-					configure_crc = true;
-			} else {
-				if (amdgpu_dm_crc_window_changed(dm_new_crtc_state, dm_old_crtc_state))
-					configure_crc = true;
-			}
-
-			if (configure_crc)
+			if (amdgpu_dm_is_valid_crc_source(dm_new_crtc_state->crc_src)) {
 				amdgpu_dm_crtc_configure_crc_source(
-					crtc, dm_new_crtc_state, dm_new_crtc_state->crc_src);
+					crtc, dm_new_crtc_state,
+					dm_new_crtc_state->crc_src);
+			}
+#endif
 		}
 	}
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 2ee6edb3df931b5eb100722377eff805f9c19fd7..1182dafcef0228fd8818cfd82e597066a7abc3c0 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -336,32 +336,6 @@ struct amdgpu_display_manager {
 	 */
 	const struct gpu_info_soc_bounding_box_v1_0 *soc_bounding_box;
 
-#ifdef CONFIG_DEBUG_FS
-	/**
-	 * @crc_win_x_start_property:
-	 *
-	 * X start of the crc calculation window
-	 */
-	struct drm_property *crc_win_x_start_property;
-	/**
-	 * @crc_win_y_start_property:
-	 *
-	 * Y start of the crc calculation window
-	 */
-	struct drm_property *crc_win_y_start_property;
-	/**
-	 * @crc_win_x_end_property:
-	 *
-	 * X end of the crc calculation window
-	 */
-	struct drm_property *crc_win_x_end_property;
-	/**
-	 * @crc_win_y_end_property:
-	 *
-	 * Y end of the crc calculation window
-	 */
-	struct drm_property *crc_win_y_end_property;
-#endif
 	/**
 	 * @mst_encoders:
 	 *
@@ -448,15 +422,6 @@ struct dm_plane_state {
 	struct dc_plane_state *dc_state;
 };
 
-#ifdef CONFIG_DEBUG_FS
-struct crc_rec {
-	uint16_t x_start;
-	uint16_t y_start;
-	uint16_t x_end;
-	uint16_t y_end;
-	};
-#endif
-
 struct dm_crtc_state {
 	struct drm_crtc_state base;
 	struct dc_stream_state *stream;
@@ -479,9 +444,6 @@ struct dm_crtc_state {
 	struct dc_info_packet vrr_infopacket;
 
 	int abm_level;
-#ifdef CONFIG_DEBUG_FS
-	struct crc_rec crc_window;
-#endif
 };
 
 #define to_dm_crtc_state(x) container_of(x, struct dm_crtc_state, base)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
index 7b886a779a8cabe94b9b48272869630c2fd9f42e..66cb8730586b1c0b520024bbb501b6b83e536c99 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
@@ -81,41 +81,6 @@ const char *const *amdgpu_dm_crtc_get_crc_sources(struct drm_crtc *crtc,
 	return pipe_crc_sources;
 }
 
-static void amdgpu_dm_set_crc_window_default(struct dm_crtc_state *dm_crtc_state)
-{
-	dm_crtc_state->crc_window.x_start = 0;
-	dm_crtc_state->crc_window.y_start = 0;
-	dm_crtc_state->crc_window.x_end = 0;
-	dm_crtc_state->crc_window.y_end = 0;
-}
-
-bool amdgpu_dm_crc_window_is_default(struct dm_crtc_state *dm_crtc_state)
-{
-	bool ret = true;
-
-	if ((dm_crtc_state->crc_window.x_start != 0) ||
-		(dm_crtc_state->crc_window.y_start != 0) ||
-		(dm_crtc_state->crc_window.x_end != 0) ||
-		(dm_crtc_state->crc_window.y_end != 0))
-		ret = false;
-
-	return ret;
-}
-
-bool amdgpu_dm_crc_window_changed(struct dm_crtc_state *dm_new_crtc_state,
-					struct dm_crtc_state *dm_old_crtc_state)
-{
-	bool ret = false;
-
-	if ((dm_new_crtc_state->crc_window.x_start != dm_old_crtc_state->crc_window.x_start) ||
-		(dm_new_crtc_state->crc_window.y_start != dm_old_crtc_state->crc_window.y_start) ||
-		(dm_new_crtc_state->crc_window.x_end != dm_old_crtc_state->crc_window.x_end) ||
-		(dm_new_crtc_state->crc_window.y_end != dm_old_crtc_state->crc_window.y_end))
-		ret = true;
-
-	return ret;
-}
-
 int
 amdgpu_dm_crtc_verify_crc_source(struct drm_crtc *crtc, const char *src_name,
 				 size_t *values_cnt)
@@ -140,7 +105,6 @@ int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc,
 	struct dc_stream_state *stream_state = dm_crtc_state->stream;
 	bool enable = amdgpu_dm_is_valid_crc_source(source);
 	int ret = 0;
-	struct crc_params *crc_window = NULL, tmp_window;
 
 	/* Configuration will be deferred to stream enable. */
 	if (!stream_state)
@@ -150,24 +114,8 @@ int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc,
 
 	/* Enable CRTC CRC generation if necessary. */
 	if (dm_is_crc_source_crtc(source) || source == AMDGPU_DM_PIPE_CRC_SOURCE_NONE) {
-		if (!enable)
-			amdgpu_dm_set_crc_window_default(dm_crtc_state);
-
-		if (!amdgpu_dm_crc_window_is_default(dm_crtc_state)) {
-			crc_window = &tmp_window;
-
-			tmp_window.windowa_x_start = dm_crtc_state->crc_window.x_start;
-			tmp_window.windowa_y_start = dm_crtc_state->crc_window.y_start;
-			tmp_window.windowa_x_end = dm_crtc_state->crc_window.x_end;
-			tmp_window.windowa_y_end = dm_crtc_state->crc_window.y_end;
-			tmp_window.windowb_x_start = dm_crtc_state->crc_window.x_start;
-			tmp_window.windowb_y_start = dm_crtc_state->crc_window.y_start;
-			tmp_window.windowb_x_end = dm_crtc_state->crc_window.x_end;
-			tmp_window.windowb_y_end = dm_crtc_state->crc_window.y_end;
-		}
-
 		if (!dc_stream_configure_crc(stream_state->ctx->dc,
-					     stream_state, crc_window, enable, enable)) {
+					     stream_state, NULL, enable, enable)) {
 			ret = -EINVAL;
 			goto unlock;
 		}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h
index eba2f1d35d07548e0b8f50e71939bd435450eb0b..f7d731797d3fc339389ace86c5b27c56b6152b51 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h
@@ -46,13 +46,10 @@ static inline bool amdgpu_dm_is_valid_crc_source(enum amdgpu_dm_pipe_crc_source
 }
 
 /* amdgpu_dm_crc.c */
-bool amdgpu_dm_crc_window_is_default(struct dm_crtc_state *dm_crtc_state);
-bool amdgpu_dm_crc_window_changed(struct dm_crtc_state *dm_new_crtc_state,
-					struct dm_crtc_state *dm_old_crtc_state);
+#ifdef CONFIG_DEBUG_FS
 int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc,
 					struct dm_crtc_state *dm_crtc_state,
 					enum amdgpu_dm_pipe_crc_source source);
-#ifdef CONFIG_DEBUG_FS
 int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name);
 int amdgpu_dm_crtc_verify_crc_source(struct drm_crtc *crtc,
 				     const char *src_name,
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 2fc12239b22cb72f4c81fbf83b83969b4e6efed0..1bd1a0935290673535333991f7fa7f7cb37798d6 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -3992,7 +3992,7 @@ bool dc_link_dp_set_test_pattern(
 	unsigned int cust_pattern_size)
 {
 	struct pipe_ctx *pipes = link->dc->current_state->res_ctx.pipe_ctx;
-	struct pipe_ctx *pipe_ctx = &pipes[0];
+	struct pipe_ctx *pipe_ctx = NULL;
 	unsigned int lane;
 	unsigned int i;
 	unsigned char link_qual_pattern[LANE_COUNT_DP_MAX] = {0};
@@ -4002,12 +4002,18 @@ bool dc_link_dp_set_test_pattern(
 	memset(&training_pattern, 0, sizeof(training_pattern));
 
 	for (i = 0; i < MAX_PIPES; i++) {
+		if (pipes[i].stream == NULL)
+			continue;
+
 		if (pipes[i].stream->link == link && !pipes[i].top_pipe && !pipes[i].prev_odm_pipe) {
 			pipe_ctx = &pipes[i];
 			break;
 		}
 	}
 
+	if (pipe_ctx == NULL)
+		return false;
+
 	/* Reset CRTC Test Pattern if it is currently running and request is VideoMode */
 	if (link->test_pattern_enabled && test_pattern ==
 			DP_TEST_PATTERN_VIDEO_MODE) {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
index 100ce0e28fd5ab40c85caccf96e86a4f95bc00bf..b096011acb490fff2016c154c2ccd20f723b0479 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
@@ -470,7 +470,7 @@ void mpc1_cursor_lock(struct mpc *mpc, int opp_id, bool lock)
 unsigned int mpc1_get_mpc_out_mux(struct mpc *mpc, int opp_id)
 {
 	struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc);
-	uint32_t val = 0;
+	uint32_t val = 0xf;
 
 	if (opp_id < MAX_OPP && REG(MUX[opp_id]))
 		REG_GET(MUX[opp_id], MPC_OUT_MUX, &val);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 36745193c391c0d949a8b16eeaf81cf187721fce..90e912fef2b36c069142e570bea412bb151486f4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -608,8 +608,8 @@ static const struct dc_debug_options debug_defaults_drv = {
 		.disable_pplib_clock_request = false,
 		.disable_pplib_wm_range = false,
 		.pplib_wm_report_mode = WM_REPORT_DEFAULT,
-		.pipe_split_policy = MPC_SPLIT_DYNAMIC,
-		.force_single_disp_pipe_split = true,
+		.pipe_split_policy = MPC_SPLIT_AVOID,
+		.force_single_disp_pipe_split = false,
 		.disable_dcc = DCC_ENABLE,
 		.voltage_align_fclk = true,
 		.disable_stereo_support = true,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
index 4825c5c1c6ed686217ec3f9850a75fd080091bdc..35f5bf08ae96e2570d22282da46f1f499236d4a4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
@@ -1731,6 +1731,7 @@ static struct resource_funcs dcn301_res_pool_funcs = {
 	.populate_dml_pipes = dcn30_populate_dml_pipes_from_context,
 	.acquire_idle_pipe_for_layer = dcn20_acquire_idle_pipe_for_layer,
 	.add_stream_to_ctx = dcn30_add_stream_to_ctx,
+	.add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource,
 	.remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
 	.populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context,
 	.set_mcif_arb_params = dcn30_set_mcif_arb_params,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
index 860e72a51534cc014d9baccf227aeccc1979a4a9..80170f9721ce949330d72408b81c48575af13101 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
@@ -2635,14 +2635,15 @@ static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndP
 	}
 
 	if (mode_lib->vba.DRAMClockChangeSupportsVActive &&
-			mode_lib->vba.MinActiveDRAMClockChangeMargin > 60 &&
-			mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) {
+			mode_lib->vba.MinActiveDRAMClockChangeMargin > 60) {
 		mode_lib->vba.DRAMClockChangeWatermark += 25;
 
 		for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
-			if (mode_lib->vba.DRAMClockChangeWatermark >
-			dml_max(mode_lib->vba.StutterEnterPlusExitWatermark, mode_lib->vba.UrgentWatermark))
-				mode_lib->vba.MinTTUVBlank[k] += 25;
+			if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) {
+				if (mode_lib->vba.DRAMClockChangeWatermark >
+				dml_max(mode_lib->vba.StutterEnterPlusExitWatermark, mode_lib->vba.UrgentWatermark))
+					mode_lib->vba.MinTTUVBlank[k] += 25;
+			}
 		}
 
 		mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive;
diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index e6231947f98723a7720504ec61ad00adf4ec08b0..a0cb746bcb0a9a1344e0cd23fa3dcf7a228a9f3b 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -1163,7 +1163,14 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev,
 	if (ret)
 		goto out;
 
-	if (old_fb->format != fb->format) {
+	/*
+	 * Only check the FOURCC format code, excluding modifiers. This is
+	 * enough for all legacy drivers. Atomic drivers have their own
+	 * checks in their ->atomic_check implementation, which will
+	 * return -EINVAL if any hw or driver constraint is violated due
+	 * to modifier changes.
+	 */
+	if (old_fb->format->format != fb->format->format) {
 		DRM_DEBUG_KMS("Page flip is not allowed to change frame buffer format.\n");
 		ret = -EINVAL;
 		goto out;
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index e5574e506a5ccf03684f47fc0f59d9a62f4ae1d8..6d9e81ea67f4b742d52afaf420a8ccfc9a4ef329 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -38,6 +38,7 @@ i915-y += i915_drv.o \
 	  i915_config.o \
 	  i915_irq.o \
 	  i915_getparam.o \
+	  i915_mitigations.o \
 	  i915_params.o \
 	  i915_pci.o \
 	  i915_scatterlist.o \
diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c
index a9439b4156037169b17361c2ea7a446f7d45b573..b3533a32f8ba2e4d015532123111c7ee989381a8 100644
--- a/drivers/gpu/drm/i915/display/icl_dsi.c
+++ b/drivers/gpu/drm/i915/display/icl_dsi.c
@@ -1616,10 +1616,6 @@ static void gen11_dsi_get_power_domains(struct intel_encoder *encoder,
 
 	get_dsi_io_power_domains(i915,
 				 enc_to_intel_dsi(encoder));
-
-	if (crtc_state->dsc.compression_enable)
-		intel_display_power_get(i915,
-					intel_dsc_power_domain(crtc_state));
 }
 
 static bool gen11_dsi_get_hw_state(struct intel_encoder *encoder,
diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c
index 9f23bac0d7924d3857e1e0e8fdc202f1341dc5b5..d64fce1a17cbc5668f756853a769244554d9504d 100644
--- a/drivers/gpu/drm/i915/display/intel_panel.c
+++ b/drivers/gpu/drm/i915/display/intel_panel.c
@@ -1650,16 +1650,13 @@ static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unus
 		val = pch_get_backlight(connector);
 	else
 		val = lpt_get_backlight(connector);
-	val = intel_panel_compute_brightness(connector, val);
-	panel->backlight.level = clamp(val, panel->backlight.min,
-				       panel->backlight.max);
 
 	if (cpu_mode) {
 		drm_dbg_kms(&dev_priv->drm,
 			    "CPU backlight register was enabled, switching to PCH override\n");
 
 		/* Write converted CPU PWM value to PCH override register */
-		lpt_set_backlight(connector->base.state, panel->backlight.level);
+		lpt_set_backlight(connector->base.state, val);
 		intel_de_write(dev_priv, BLC_PWM_PCH_CTL1,
 			       pch_ctl1 | BLM_PCH_OVERRIDE_ENABLE);
 
@@ -1667,6 +1664,10 @@ static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unus
 			       cpu_ctl2 & ~BLM_PWM_ENABLE);
 	}
 
+	val = intel_panel_compute_brightness(connector, val);
+	panel->backlight.level = clamp(val, panel->backlight.min,
+				       panel->backlight.max);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c
index d52f9c1779081e02afa7d843926938f04fc25912..f94025ec603a6d132b0024b05afad604834fad02 100644
--- a/drivers/gpu/drm/i915/display/vlv_dsi.c
+++ b/drivers/gpu/drm/i915/display/vlv_dsi.c
@@ -812,10 +812,20 @@ static void intel_dsi_pre_enable(struct intel_atomic_state *state,
 		intel_dsi_prepare(encoder, pipe_config);
 
 	intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_POWER_ON);
-	intel_dsi_msleep(intel_dsi, intel_dsi->panel_on_delay);
 
-	/* Deassert reset */
-	intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET);
+	/*
+	 * Give the panel time to power-on and then deassert its reset.
+	 * Depending on the VBT MIPI sequences version the deassert-seq
+	 * may contain the necessary delay, intel_dsi_msleep() will skip
+	 * the delay in that case. If there is no deassert-seq, then an
+	 * unconditional msleep is used to give the panel time to power-on.
+	 */
+	if (dev_priv->vbt.dsi.sequence[MIPI_SEQ_DEASSERT_RESET]) {
+		intel_dsi_msleep(intel_dsi, intel_dsi->panel_on_delay);
+		intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET);
+	} else {
+		msleep(intel_dsi->panel_on_delay);
+	}
 
 	if (IS_GEMINILAKE(dev_priv)) {
 		glk_cold_boot = glk_dsi_enable_io(encoder);
diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c
index d93d85cd30270ba371ac877632b1d2261dcdc8e5..94465374ca2fe5505c67804bf697d5b88f9c102f 100644
--- a/drivers/gpu/drm/i915/gt/gen7_renderclear.c
+++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c
@@ -7,8 +7,6 @@
 #include "i915_drv.h"
 #include "intel_gpu_commands.h"
 
-#define MAX_URB_ENTRIES 64
-#define STATE_SIZE (4 * 1024)
 #define GT3_INLINE_DATA_DELAYS 0x1E00
 #define batch_advance(Y, CS) GEM_BUG_ON((Y)->end != (CS))
 
@@ -34,38 +32,59 @@ struct batch_chunk {
 };
 
 struct batch_vals {
-	u32 max_primitives;
-	u32 max_urb_entries;
-	u32 cmd_size;
-	u32 state_size;
+	u32 max_threads;
 	u32 state_start;
-	u32 batch_size;
+	u32 surface_start;
 	u32 surface_height;
 	u32 surface_width;
-	u32 scratch_size;
-	u32 max_size;
+	u32 size;
 };
 
+static inline int num_primitives(const struct batch_vals *bv)
+{
+	/*
+	 * We need to saturate the GPU with work in order to dispatch
+	 * a shader on every HW thread, and clear the thread-local registers.
+	 * In short, we have to dispatch work faster than the shaders can
+	 * run in order to fill the EU and occupy each HW thread.
+	 */
+	return bv->max_threads;
+}
+
 static void
 batch_get_defaults(struct drm_i915_private *i915, struct batch_vals *bv)
 {
 	if (IS_HASWELL(i915)) {
-		bv->max_primitives = 280;
-		bv->max_urb_entries = MAX_URB_ENTRIES;
+		switch (INTEL_INFO(i915)->gt) {
+		default:
+		case 1:
+			bv->max_threads = 70;
+			break;
+		case 2:
+			bv->max_threads = 140;
+			break;
+		case 3:
+			bv->max_threads = 280;
+			break;
+		}
 		bv->surface_height = 16 * 16;
 		bv->surface_width = 32 * 2 * 16;
 	} else {
-		bv->max_primitives = 128;
-		bv->max_urb_entries = MAX_URB_ENTRIES / 2;
+		switch (INTEL_INFO(i915)->gt) {
+		default:
+		case 1: /* including vlv */
+			bv->max_threads = 36;
+			break;
+		case 2:
+			bv->max_threads = 128;
+			break;
+		}
 		bv->surface_height = 16 * 8;
 		bv->surface_width = 32 * 16;
 	}
-	bv->cmd_size = bv->max_primitives * 4096;
-	bv->state_size = STATE_SIZE;
-	bv->state_start = bv->cmd_size;
-	bv->batch_size = bv->cmd_size + bv->state_size;
-	bv->scratch_size = bv->surface_height * bv->surface_width;
-	bv->max_size = bv->batch_size + bv->scratch_size;
+	bv->state_start = round_up(SZ_1K + num_primitives(bv) * 64, SZ_4K);
+	bv->surface_start = bv->state_start + SZ_4K;
+	bv->size = bv->surface_start + bv->surface_height * bv->surface_width;
 }
 
 static void batch_init(struct batch_chunk *bc,
@@ -155,7 +174,8 @@ static u32
 gen7_fill_binding_table(struct batch_chunk *state,
 			const struct batch_vals *bv)
 {
-	u32 surface_start = gen7_fill_surface_state(state, bv->batch_size, bv);
+	u32 surface_start =
+		gen7_fill_surface_state(state, bv->surface_start, bv);
 	u32 *cs = batch_alloc_items(state, 32, 8);
 	u32 offset = batch_offset(state, cs);
 
@@ -214,9 +234,9 @@ static void
 gen7_emit_state_base_address(struct batch_chunk *batch,
 			     u32 surface_state_base)
 {
-	u32 *cs = batch_alloc_items(batch, 0, 12);
+	u32 *cs = batch_alloc_items(batch, 0, 10);
 
-	*cs++ = STATE_BASE_ADDRESS | (12 - 2);
+	*cs++ = STATE_BASE_ADDRESS | (10 - 2);
 	/* general */
 	*cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY;
 	/* surface */
@@ -233,8 +253,6 @@ gen7_emit_state_base_address(struct batch_chunk *batch,
 	*cs++ = BASE_ADDRESS_MODIFY;
 	*cs++ = 0;
 	*cs++ = BASE_ADDRESS_MODIFY;
-	*cs++ = 0;
-	*cs++ = 0;
 	batch_advance(batch, cs);
 }
 
@@ -244,8 +262,7 @@ gen7_emit_vfe_state(struct batch_chunk *batch,
 		    u32 urb_size, u32 curbe_size,
 		    u32 mode)
 {
-	u32 urb_entries = bv->max_urb_entries;
-	u32 threads = bv->max_primitives - 1;
+	u32 threads = bv->max_threads - 1;
 	u32 *cs = batch_alloc_items(batch, 32, 8);
 
 	*cs++ = MEDIA_VFE_STATE | (8 - 2);
@@ -254,7 +271,7 @@ gen7_emit_vfe_state(struct batch_chunk *batch,
 	*cs++ = 0;
 
 	/* number of threads & urb entries for GPGPU vs Media Mode */
-	*cs++ = threads << 16 | urb_entries << 8 | mode << 2;
+	*cs++ = threads << 16 | 1 << 8 | mode << 2;
 
 	*cs++ = 0;
 
@@ -293,17 +310,12 @@ gen7_emit_media_object(struct batch_chunk *batch,
 {
 	unsigned int x_offset = (media_object_index % 16) * 64;
 	unsigned int y_offset = (media_object_index / 16) * 16;
-	unsigned int inline_data_size;
-	unsigned int media_batch_size;
-	unsigned int i;
+	unsigned int pkt = 6 + 3;
 	u32 *cs;
 
-	inline_data_size = 112 * 8;
-	media_batch_size = inline_data_size + 6;
-
-	cs = batch_alloc_items(batch, 8, media_batch_size);
+	cs = batch_alloc_items(batch, 8, pkt);
 
-	*cs++ = MEDIA_OBJECT | (media_batch_size - 2);
+	*cs++ = MEDIA_OBJECT | (pkt - 2);
 
 	/* interface descriptor offset */
 	*cs++ = 0;
@@ -317,25 +329,44 @@ gen7_emit_media_object(struct batch_chunk *batch,
 	*cs++ = 0;
 
 	/* inline */
-	*cs++ = (y_offset << 16) | (x_offset);
+	*cs++ = y_offset << 16 | x_offset;
 	*cs++ = 0;
 	*cs++ = GT3_INLINE_DATA_DELAYS;
-	for (i = 3; i < inline_data_size; i++)
-		*cs++ = 0;
 
 	batch_advance(batch, cs);
 }
 
 static void gen7_emit_pipeline_flush(struct batch_chunk *batch)
 {
-	u32 *cs = batch_alloc_items(batch, 0, 5);
+	u32 *cs = batch_alloc_items(batch, 0, 4);
 
-	*cs++ = GFX_OP_PIPE_CONTROL(5);
-	*cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE |
-		PIPE_CONTROL_GLOBAL_GTT_IVB;
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+		PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+		PIPE_CONTROL_DC_FLUSH_ENABLE |
+		PIPE_CONTROL_CS_STALL;
 	*cs++ = 0;
 	*cs++ = 0;
+
+	batch_advance(batch, cs);
+}
+
+static void gen7_emit_pipeline_invalidate(struct batch_chunk *batch)
+{
+	u32 *cs = batch_alloc_items(batch, 0, 8);
+
+	/* ivb: Stall before STATE_CACHE_INVALIDATE */
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = PIPE_CONTROL_STALL_AT_SCOREBOARD |
+		PIPE_CONTROL_CS_STALL;
+	*cs++ = 0;
+	*cs++ = 0;
+
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE;
 	*cs++ = 0;
+	*cs++ = 0;
+
 	batch_advance(batch, cs);
 }
 
@@ -344,34 +375,34 @@ static void emit_batch(struct i915_vma * const vma,
 		       const struct batch_vals *bv)
 {
 	struct drm_i915_private *i915 = vma->vm->i915;
-	unsigned int desc_count = 64;
-	const u32 urb_size = 112;
+	const unsigned int desc_count = 1;
+	const unsigned int urb_size = 1;
 	struct batch_chunk cmds, state;
-	u32 interface_descriptor;
+	u32 descriptors;
 	unsigned int i;
 
-	batch_init(&cmds, vma, start, 0, bv->cmd_size);
-	batch_init(&state, vma, start, bv->state_start, bv->state_size);
+	batch_init(&cmds, vma, start, 0, bv->state_start);
+	batch_init(&state, vma, start, bv->state_start, SZ_4K);
 
-	interface_descriptor =
-		gen7_fill_interface_descriptor(&state, bv,
-					       IS_HASWELL(i915) ?
-					       &cb_kernel_hsw :
-					       &cb_kernel_ivb,
-					       desc_count);
-	gen7_emit_pipeline_flush(&cmds);
+	descriptors = gen7_fill_interface_descriptor(&state, bv,
+						     IS_HASWELL(i915) ?
+						     &cb_kernel_hsw :
+						     &cb_kernel_ivb,
+						     desc_count);
+
+	gen7_emit_pipeline_invalidate(&cmds);
 	batch_add(&cmds, PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
 	batch_add(&cmds, MI_NOOP);
-	gen7_emit_state_base_address(&cmds, interface_descriptor);
+	gen7_emit_pipeline_invalidate(&cmds);
+
 	gen7_emit_pipeline_flush(&cmds);
+	gen7_emit_state_base_address(&cmds, descriptors);
+	gen7_emit_pipeline_invalidate(&cmds);
 
 	gen7_emit_vfe_state(&cmds, bv, urb_size - 1, 0, 0);
+	gen7_emit_interface_descriptor_load(&cmds, descriptors, desc_count);
 
-	gen7_emit_interface_descriptor_load(&cmds,
-					    interface_descriptor,
-					    desc_count);
-
-	for (i = 0; i < bv->max_primitives; i++)
+	for (i = 0; i < num_primitives(bv); i++)
 		gen7_emit_media_object(&cmds, i);
 
 	batch_add(&cmds, MI_BATCH_BUFFER_END);
@@ -385,15 +416,15 @@ int gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine,
 
 	batch_get_defaults(engine->i915, &bv);
 	if (!vma)
-		return bv.max_size;
+		return bv.size;
 
-	GEM_BUG_ON(vma->obj->base.size < bv.max_size);
+	GEM_BUG_ON(vma->obj->base.size < bv.size);
 
 	batch = i915_gem_object_pin_map(vma->obj, I915_MAP_WC);
 	if (IS_ERR(batch))
 		return PTR_ERR(batch);
 
-	emit_batch(vma, memset(batch, 0, bv.max_size), &bv);
+	emit_batch(vma, memset(batch, 0, bv.size), &bv);
 
 	i915_gem_object_flush_map(vma->obj);
 	__i915_gem_object_release_map(vma->obj);
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index a41b43f445b8ae3fee0f545a7d3a29e5ef10a3c9..ecf3a6118a6d7989a36488672faca28edf37fdc6 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -32,6 +32,7 @@
 #include "gen6_ppgtt.h"
 #include "gen7_renderclear.h"
 #include "i915_drv.h"
+#include "i915_mitigations.h"
 #include "intel_breadcrumbs.h"
 #include "intel_context.h"
 #include "intel_gt.h"
@@ -886,7 +887,8 @@ static int switch_context(struct i915_request *rq)
 	GEM_BUG_ON(HAS_EXECLISTS(engine->i915));
 
 	if (engine->wa_ctx.vma && ce != engine->kernel_context) {
-		if (engine->wa_ctx.vma->private != ce) {
+		if (engine->wa_ctx.vma->private != ce &&
+		    i915_mitigate_clear_residuals()) {
 			ret = clear_residuals(rq);
 			if (ret)
 				return ret;
@@ -1290,7 +1292,7 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
 
 	GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma);
 
-	if (IS_HASWELL(engine->i915) && engine->class == RENDER_CLASS) {
+	if (IS_GEN(engine->i915, 7) && engine->class == RENDER_CLASS) {
 		err = gen7_ctx_switch_bb_init(engine);
 		if (err)
 			goto err_ring_unpin;
diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index a15f875396576d0a0e56a79068de2ec75d0758bd..62a5b0dd2003b76ece28cc0a12db1dd178ee8e07 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -217,6 +217,15 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 				  DDI_BUF_CTL_ENABLE);
 			vgpu_vreg_t(vgpu, DDI_BUF_CTL(port)) |= DDI_BUF_IS_IDLE;
 		}
+		vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &=
+			~(PORTA_HOTPLUG_ENABLE | PORTA_HOTPLUG_STATUS_MASK);
+		vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &=
+			~(PORTB_HOTPLUG_ENABLE | PORTB_HOTPLUG_STATUS_MASK);
+		vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &=
+			~(PORTC_HOTPLUG_ENABLE | PORTC_HOTPLUG_STATUS_MASK);
+		/* No hpd_invert set in vgpu vbt, need to clear invert mask */
+		vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= ~BXT_DDI_HPD_INVERT_MASK;
+		vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= ~BXT_DE_PORT_HOTPLUG_MASK;
 
 		vgpu_vreg_t(vgpu, BXT_P_CR_GT_DISP_PWRON) &= ~(BIT(0) | BIT(1));
 		vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY0)) &=
@@ -273,6 +282,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 			vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_EDP)) |=
 				(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
 				 TRANS_DDI_FUNC_ENABLE);
+			vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
+				PORTA_HOTPLUG_ENABLE;
 			vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
 				GEN8_DE_PORT_HOTPLUG(HPD_PORT_A);
 		}
@@ -301,6 +312,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 				(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
 				 (PORT_B << TRANS_DDI_PORT_SHIFT) |
 				 TRANS_DDI_FUNC_ENABLE);
+			vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
+				PORTB_HOTPLUG_ENABLE;
 			vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
 				GEN8_DE_PORT_HOTPLUG(HPD_PORT_B);
 		}
@@ -329,6 +342,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 				(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
 				 (PORT_B << TRANS_DDI_PORT_SHIFT) |
 				 TRANS_DDI_FUNC_ENABLE);
+			vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
+				PORTC_HOTPLUG_ENABLE;
 			vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
 				GEN8_DE_PORT_HOTPLUG(HPD_PORT_C);
 		}
@@ -661,44 +676,62 @@ void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected)
 				PORTD_HOTPLUG_STATUS_MASK;
 		intel_vgpu_trigger_virtual_event(vgpu, DP_D_HOTPLUG);
 	} else if (IS_BROXTON(i915)) {
-		if (connected) {
-			if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) {
+		if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) {
+			if (connected) {
 				vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
 					GEN8_DE_PORT_HOTPLUG(HPD_PORT_A);
+			} else {
+				vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &=
+					~GEN8_DE_PORT_HOTPLUG(HPD_PORT_A);
 			}
-			if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
-				vgpu_vreg_t(vgpu, SFUSE_STRAP) |=
-					SFUSE_STRAP_DDIB_DETECTED;
+			vgpu_vreg_t(vgpu, GEN8_DE_PORT_IIR) |=
+				GEN8_DE_PORT_HOTPLUG(HPD_PORT_A);
+			vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &=
+				~PORTA_HOTPLUG_STATUS_MASK;
+			vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
+				PORTA_HOTPLUG_LONG_DETECT;
+			intel_vgpu_trigger_virtual_event(vgpu, DP_A_HOTPLUG);
+		}
+		if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
+			if (connected) {
 				vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
 					GEN8_DE_PORT_HOTPLUG(HPD_PORT_B);
-			}
-			if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) {
 				vgpu_vreg_t(vgpu, SFUSE_STRAP) |=
-					SFUSE_STRAP_DDIC_DETECTED;
-				vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
-					GEN8_DE_PORT_HOTPLUG(HPD_PORT_C);
-			}
-		} else {
-			if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) {
+					SFUSE_STRAP_DDIB_DETECTED;
+			} else {
 				vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &=
-					~GEN8_DE_PORT_HOTPLUG(HPD_PORT_A);
-			}
-			if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
+					~GEN8_DE_PORT_HOTPLUG(HPD_PORT_B);
 				vgpu_vreg_t(vgpu, SFUSE_STRAP) &=
 					~SFUSE_STRAP_DDIB_DETECTED;
-				vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &=
-					~GEN8_DE_PORT_HOTPLUG(HPD_PORT_B);
 			}
-			if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) {
-				vgpu_vreg_t(vgpu, SFUSE_STRAP) &=
-					~SFUSE_STRAP_DDIC_DETECTED;
+			vgpu_vreg_t(vgpu, GEN8_DE_PORT_IIR) |=
+				GEN8_DE_PORT_HOTPLUG(HPD_PORT_B);
+			vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &=
+				~PORTB_HOTPLUG_STATUS_MASK;
+			vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
+				PORTB_HOTPLUG_LONG_DETECT;
+			intel_vgpu_trigger_virtual_event(vgpu, DP_B_HOTPLUG);
+		}
+		if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) {
+			if (connected) {
+				vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
+					GEN8_DE_PORT_HOTPLUG(HPD_PORT_C);
+				vgpu_vreg_t(vgpu, SFUSE_STRAP) |=
+					SFUSE_STRAP_DDIC_DETECTED;
+			} else {
 				vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &=
 					~GEN8_DE_PORT_HOTPLUG(HPD_PORT_C);
+				vgpu_vreg_t(vgpu, SFUSE_STRAP) &=
+					~SFUSE_STRAP_DDIC_DETECTED;
 			}
+			vgpu_vreg_t(vgpu, GEN8_DE_PORT_IIR) |=
+				GEN8_DE_PORT_HOTPLUG(HPD_PORT_C);
+			vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &=
+				~PORTC_HOTPLUG_STATUS_MASK;
+			vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
+				PORTC_HOTPLUG_LONG_DETECT;
+			intel_vgpu_trigger_virtual_event(vgpu, DP_C_HOTPLUG);
 		}
-		vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
-			PORTB_HOTPLUG_STATUS_MASK;
-		intel_vgpu_trigger_virtual_event(vgpu, DP_B_HOTPLUG);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index e49944fde3339aafd16962ace9fab7337e0c0f6a..cbe5931906e0a8976aa7ecd4b0d820d05fd6ea42 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -437,10 +437,9 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
 	if (ret)
 		goto out_clean_sched_policy;
 
-	if (IS_BROADWELL(dev_priv))
+	if (IS_BROADWELL(dev_priv) || IS_BROXTON(dev_priv))
 		ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_B);
-	/* FixMe: Re-enable APL/BXT once vfio_edid enabled */
-	else if (!IS_BROXTON(dev_priv))
+	else
 		ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_D);
 	if (ret)
 		goto out_clean_sched_policy;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 88ad754962af1e3d2e7cc23176755f34b6029355..99eb0d7bbc447fb7b4c58798ce66e6752189bc18 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1047,6 +1047,8 @@ static void intel_shutdown_encoders(struct drm_i915_private *dev_priv)
 
 void i915_driver_shutdown(struct drm_i915_private *i915)
 {
+	disable_rpm_wakeref_asserts(&i915->runtime_pm);
+
 	i915_gem_suspend(i915);
 
 	drm_kms_helper_poll_disable(&i915->drm);
@@ -1060,6 +1062,8 @@ void i915_driver_shutdown(struct drm_i915_private *i915)
 
 	intel_suspend_encoders(i915);
 	intel_shutdown_encoders(i915);
+
+	enable_rpm_wakeref_asserts(&i915->runtime_pm);
 }
 
 static bool suspend_to_idle(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_mitigations.c b/drivers/gpu/drm/i915/i915_mitigations.c
new file mode 100644
index 0000000000000000000000000000000000000000..84f12598d145845bb9f7ef1f3a50d566ca6980d8
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_mitigations.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright Â© 2021 Intel Corporation
+ */
+
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include "i915_drv.h"
+#include "i915_mitigations.h"
+
+static unsigned long mitigations __read_mostly = ~0UL;
+
+enum {
+	CLEAR_RESIDUALS = 0,
+};
+
+static const char * const names[] = {
+	[CLEAR_RESIDUALS] = "residuals",
+};
+
+bool i915_mitigate_clear_residuals(void)
+{
+	return READ_ONCE(mitigations) & BIT(CLEAR_RESIDUALS);
+}
+
+static int mitigations_set(const char *val, const struct kernel_param *kp)
+{
+	unsigned long new = ~0UL;
+	char *str, *sep, *tok;
+	bool first = true;
+	int err = 0;
+
+	BUILD_BUG_ON(ARRAY_SIZE(names) >= BITS_PER_TYPE(mitigations));
+
+	str = kstrdup(val, GFP_KERNEL);
+	if (!str)
+		return -ENOMEM;
+
+	for (sep = str; (tok = strsep(&sep, ","));) {
+		bool enable = true;
+		int i;
+
+		/* Be tolerant of leading/trailing whitespace */
+		tok = strim(tok);
+
+		if (first) {
+			first = false;
+
+			if (!strcmp(tok, "auto"))
+				continue;
+
+			new = 0;
+			if (!strcmp(tok, "off"))
+				continue;
+		}
+
+		if (*tok == '!') {
+			enable = !enable;
+			tok++;
+		}
+
+		if (!strncmp(tok, "no", 2)) {
+			enable = !enable;
+			tok += 2;
+		}
+
+		if (*tok == '\0')
+			continue;
+
+		for (i = 0; i < ARRAY_SIZE(names); i++) {
+			if (!strcmp(tok, names[i])) {
+				if (enable)
+					new |= BIT(i);
+				else
+					new &= ~BIT(i);
+				break;
+			}
+		}
+		if (i == ARRAY_SIZE(names)) {
+			pr_err("Bad \"%s.mitigations=%s\", '%s' is unknown\n",
+			       DRIVER_NAME, val, tok);
+			err = -EINVAL;
+			break;
+		}
+	}
+	kfree(str);
+	if (err)
+		return err;
+
+	WRITE_ONCE(mitigations, new);
+	return 0;
+}
+
+static int mitigations_get(char *buffer, const struct kernel_param *kp)
+{
+	unsigned long local = READ_ONCE(mitigations);
+	int count, i;
+	bool enable;
+
+	if (!local)
+		return scnprintf(buffer, PAGE_SIZE, "%s\n", "off");
+
+	if (local & BIT(BITS_PER_LONG - 1)) {
+		count = scnprintf(buffer, PAGE_SIZE, "%s,", "auto");
+		enable = false;
+	} else {
+		enable = true;
+		count = 0;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(names); i++) {
+		if ((local & BIT(i)) != enable)
+			continue;
+
+		count += scnprintf(buffer + count, PAGE_SIZE - count,
+				   "%s%s,", enable ? "" : "!", names[i]);
+	}
+
+	buffer[count - 1] = '\n';
+	return count;
+}
+
+static const struct kernel_param_ops ops = {
+	.set = mitigations_set,
+	.get = mitigations_get,
+};
+
+module_param_cb_unsafe(mitigations, &ops, NULL, 0600);
+MODULE_PARM_DESC(mitigations,
+"Selectively enable security mitigations for all IntelÂ® GPUs in the system.\n"
+"\n"
+"  auto -- enables all mitigations required for the platform [default]\n"
+"  off  -- disables all mitigations\n"
+"\n"
+"Individual mitigations can be enabled by passing a comma-separated string,\n"
+"e.g. mitigations=residuals to enable only clearing residuals or\n"
+"mitigations=auto,noresiduals to disable only the clear residual mitigation.\n"
+"Either '!' or 'no' may be used to switch from enabling the mitigation to\n"
+"disabling it.\n"
+"\n"
+"Active mitigations for Ivybridge, Baytrail, Haswell:\n"
+"  residuals -- clear all thread-local registers between contexts"
+);
diff --git a/drivers/gpu/drm/i915/i915_mitigations.h b/drivers/gpu/drm/i915/i915_mitigations.h
new file mode 100644
index 0000000000000000000000000000000000000000..1359d8135287acc1a8cf0aa652f1145d371f9fb2
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_mitigations.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright Â© 2021 Intel Corporation
+ */
+
+#ifndef __I915_MITIGATIONS_H__
+#define __I915_MITIGATIONS_H__
+
+#include <linux/types.h>
+
+bool i915_mitigate_clear_residuals(void);
+
+#endif /* __I915_MITIGATIONS_H__ */
diff --git a/drivers/gpu/drm/nouveau/dispnv50/Kbuild b/drivers/gpu/drm/nouveau/dispnv50/Kbuild
index 6fdddb266fb1b22333780a3604c5f46825eeb81b..4488e1c061b3d94f3876ca5464d4d096c3698414 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/Kbuild
+++ b/drivers/gpu/drm/nouveau/dispnv50/Kbuild
@@ -37,6 +37,7 @@ nouveau-y += dispnv50/wimmc37b.o
 nouveau-y += dispnv50/wndw.o
 nouveau-y += dispnv50/wndwc37e.o
 nouveau-y += dispnv50/wndwc57e.o
+nouveau-y += dispnv50/wndwc67e.o
 
 nouveau-y += dispnv50/base.o
 nouveau-y += dispnv50/base507c.o
diff --git a/drivers/gpu/drm/nouveau/dispnv50/core.c b/drivers/gpu/drm/nouveau/dispnv50/core.c
index 27ea3f34706d4fbdc218ca04cdf339c28d99850b..abefc2343443bef6895f95a35ac08cde64d273ca 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/core.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/core.c
@@ -42,6 +42,7 @@ nv50_core_new(struct nouveau_drm *drm, struct nv50_core **pcore)
 		int version;
 		int (*new)(struct nouveau_drm *, s32, struct nv50_core **);
 	} cores[] = {
+		{ GA102_DISP_CORE_CHANNEL_DMA, 0, corec57d_new },
 		{ TU102_DISP_CORE_CHANNEL_DMA, 0, corec57d_new },
 		{ GV100_DISP_CORE_CHANNEL_DMA, 0, corec37d_new },
 		{ GP102_DISP_CORE_CHANNEL_DMA, 0, core917d_new },
diff --git a/drivers/gpu/drm/nouveau/dispnv50/curs.c b/drivers/gpu/drm/nouveau/dispnv50/curs.c
index 121c24a18f111ccee5d5240d0824fde6777139f6..31d8b2e4791dd0fbcaf75009eb3df76b80c24f9b 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/curs.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/curs.c
@@ -31,6 +31,7 @@ nv50_curs_new(struct nouveau_drm *drm, int head, struct nv50_wndw **pwndw)
 		int version;
 		int (*new)(struct nouveau_drm *, int, s32, struct nv50_wndw **);
 	} curses[] = {
+		{ GA102_DISP_CURSOR, 0, cursc37a_new },
 		{ TU102_DISP_CURSOR, 0, cursc37a_new },
 		{ GV100_DISP_CURSOR, 0, cursc37a_new },
 		{ GK104_DISP_CURSOR, 0, curs907a_new },
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index 33fff388dd83c2f259808e689117d532c582aebb..c6367035970eb5e6a14fcf4e4d3c531c28de91b7 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -222,7 +222,7 @@ nv50_dmac_wait(struct nvif_push *push, u32 size)
 
 int
 nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp,
-		 const s32 *oclass, u8 head, void *data, u32 size, u64 syncbuf,
+		 const s32 *oclass, u8 head, void *data, u32 size, s64 syncbuf,
 		 struct nv50_dmac *dmac)
 {
 	struct nouveau_cli *cli = (void *)device->object.client;
@@ -271,7 +271,7 @@ nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp,
 	if (ret)
 		return ret;
 
-	if (!syncbuf)
+	if (syncbuf < 0)
 		return 0;
 
 	ret = nvif_object_ctor(&dmac->base.user, "kmsSyncCtxDma", NV50_DISP_HANDLE_SYNCBUF,
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.h b/drivers/gpu/drm/nouveau/dispnv50/disp.h
index 92bddc0836171b853dec40cb09447da4e15259f9..38dec11e7dda5577fdd11263b8e9b8b8b2a44309 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.h
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.h
@@ -95,7 +95,7 @@ struct nv50_outp_atom {
 
 int nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp,
 		     const s32 *oclass, u8 head, void *data, u32 size,
-		     u64 syncbuf, struct nv50_dmac *dmac);
+		     s64 syncbuf, struct nv50_dmac *dmac);
 void nv50_dmac_destroy(struct nv50_dmac *);
 
 /*
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wimm.c b/drivers/gpu/drm/nouveau/dispnv50/wimm.c
index a1ac153d5e9844fc35c76b292888ae8c17a752c9..566fbddfc8d7f47beffdb28ef5748a324fb555b6 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/wimm.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/wimm.c
@@ -31,6 +31,7 @@ nv50_wimm_init(struct nouveau_drm *drm, struct nv50_wndw *wndw)
 		int version;
 		int (*init)(struct nouveau_drm *, s32, struct nv50_wndw *);
 	} wimms[] = {
+		{ GA102_DISP_WINDOW_IMM_CHANNEL_DMA, 0, wimmc37b_init },
 		{ TU102_DISP_WINDOW_IMM_CHANNEL_DMA, 0, wimmc37b_init },
 		{ GV100_DISP_WINDOW_IMM_CHANNEL_DMA, 0, wimmc37b_init },
 		{}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c b/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c
index 685b7087132426933adf63d9fdddf55fdc439995..b390029c69ec130872a1dc9c6373eba82dea47d5 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c
@@ -76,7 +76,7 @@ wimmc37b_init_(const struct nv50_wimm_func *func, struct nouveau_drm *drm,
 	int ret;
 
 	ret = nv50_dmac_create(&drm->client.device, &disp->disp->object,
-			       &oclass, 0, &args, sizeof(args), 0,
+			       &oclass, 0, &args, sizeof(args), -1,
 			       &wndw->wimm);
 	if (ret) {
 		NV_ERROR(drm, "wimm%04x allocation failed: %d\n", oclass, ret);
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
index 0356474ad6f6afcd37e5c9540968d406fb5b41bc..ce451242f79eb2fc16403a5cf1d039b70ede0ced 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
@@ -784,6 +784,7 @@ nv50_wndw_new(struct nouveau_drm *drm, enum drm_plane_type type, int index,
 		int (*new)(struct nouveau_drm *, enum drm_plane_type,
 			   int, s32, struct nv50_wndw **);
 	} wndws[] = {
+		{ GA102_DISP_WINDOW_CHANNEL_DMA, 0, wndwc67e_new },
 		{ TU102_DISP_WINDOW_CHANNEL_DMA, 0, wndwc57e_new },
 		{ GV100_DISP_WINDOW_CHANNEL_DMA, 0, wndwc37e_new },
 		{}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.h b/drivers/gpu/drm/nouveau/dispnv50/wndw.h
index 3278e28800343c37fe7a88a29df1d7bcee6cab41..f4e0c508003441eea6b190d77f5caf6b3afa258e 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.h
+++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.h
@@ -129,6 +129,14 @@ int wndwc37e_update(struct nv50_wndw *, u32 *);
 
 int wndwc57e_new(struct nouveau_drm *, enum drm_plane_type, int, s32,
 		 struct nv50_wndw **);
+bool wndwc57e_ilut(struct nv50_wndw *, struct nv50_wndw_atom *, int);
+int wndwc57e_ilut_set(struct nv50_wndw *, struct nv50_wndw_atom *);
+int wndwc57e_ilut_clr(struct nv50_wndw *);
+int wndwc57e_csc_set(struct nv50_wndw *, struct nv50_wndw_atom *);
+int wndwc57e_csc_clr(struct nv50_wndw *);
+
+int wndwc67e_new(struct nouveau_drm *, enum drm_plane_type, int, s32,
+		 struct nv50_wndw **);
 
 int nv50_wndw_new(struct nouveau_drm *, enum drm_plane_type, int index,
 		  struct nv50_wndw **);
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c b/drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c
index 429be0bb022206678f4fd7d76e19cfeca18e916d..abdd3bb658b383e7fe4f28d315655dad31b93a13 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c
@@ -80,7 +80,7 @@ wndwc57e_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
 	return 0;
 }
 
-static int
+int
 wndwc57e_csc_clr(struct nv50_wndw *wndw)
 {
 	struct nvif_push *push = wndw->wndw.push;
@@ -98,7 +98,7 @@ wndwc57e_csc_clr(struct nv50_wndw *wndw)
 	return 0;
 }
 
-static int
+int
 wndwc57e_csc_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
 {
 	struct nvif_push *push = wndw->wndw.push;
@@ -111,7 +111,7 @@ wndwc57e_csc_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
 	return 0;
 }
 
-static int
+int
 wndwc57e_ilut_clr(struct nv50_wndw *wndw)
 {
 	struct nvif_push *push = wndw->wndw.push;
@@ -124,7 +124,7 @@ wndwc57e_ilut_clr(struct nv50_wndw *wndw)
 	return 0;
 }
 
-static int
+int
 wndwc57e_ilut_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
 {
 	struct nvif_push *push = wndw->wndw.push;
@@ -179,7 +179,7 @@ wndwc57e_ilut_load(struct drm_color_lut *in, int size, void __iomem *mem)
 	writew(readw(mem - 4), mem + 4);
 }
 
-static bool
+bool
 wndwc57e_ilut(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw, int size)
 {
 	if (size = size ? size : 1024, size != 256 && size != 1024)
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndwc67e.c b/drivers/gpu/drm/nouveau/dispnv50/wndwc67e.c
new file mode 100644
index 0000000000000000000000000000000000000000..7a370fa1df20c85a7d64393e9acaaf122da717ae
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/wndwc67e.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "wndw.h"
+#include "atom.h"
+
+#include <nvif/pushc37b.h>
+
+#include <nvhw/class/clc57e.h>
+
+static int
+wndwc67e_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	struct nvif_push *push = wndw->wndw.push;
+	int ret;
+
+	if ((ret = PUSH_WAIT(push, 17)))
+		return ret;
+
+	PUSH_MTHD(push, NVC57E, SET_PRESENT_CONTROL,
+		  NVVAL(NVC57E, SET_PRESENT_CONTROL, MIN_PRESENT_INTERVAL, asyw->image.interval) |
+		  NVVAL(NVC57E, SET_PRESENT_CONTROL, BEGIN_MODE, asyw->image.mode) |
+		  NVDEF(NVC57E, SET_PRESENT_CONTROL, TIMESTAMP_MODE, DISABLE));
+
+	PUSH_MTHD(push, NVC57E, SET_SIZE,
+		  NVVAL(NVC57E, SET_SIZE, WIDTH, asyw->image.w) |
+		  NVVAL(NVC57E, SET_SIZE, HEIGHT, asyw->image.h),
+
+				SET_STORAGE,
+		  NVVAL(NVC57E, SET_STORAGE, BLOCK_HEIGHT, asyw->image.blockh),
+
+				SET_PARAMS,
+		  NVVAL(NVC57E, SET_PARAMS, FORMAT, asyw->image.format) |
+		  NVDEF(NVC57E, SET_PARAMS, CLAMP_BEFORE_BLEND, DISABLE) |
+		  NVDEF(NVC57E, SET_PARAMS, SWAP_UV, DISABLE) |
+		  NVDEF(NVC57E, SET_PARAMS, FMT_ROUNDING_MODE, ROUND_TO_NEAREST),
+
+				SET_PLANAR_STORAGE(0),
+		  NVVAL(NVC57E, SET_PLANAR_STORAGE, PITCH, asyw->image.blocks[0]) |
+		  NVVAL(NVC57E, SET_PLANAR_STORAGE, PITCH, asyw->image.pitch[0] >> 6));
+
+	PUSH_MTHD(push, NVC57E, SET_CONTEXT_DMA_ISO(0), asyw->image.handle, 1);
+	PUSH_MTHD(push, NVC57E, SET_OFFSET(0), asyw->image.offset[0] >> 8);
+
+	PUSH_MTHD(push, NVC57E, SET_POINT_IN(0),
+		  NVVAL(NVC57E, SET_POINT_IN, X, asyw->state.src_x >> 16) |
+		  NVVAL(NVC57E, SET_POINT_IN, Y, asyw->state.src_y >> 16));
+
+	PUSH_MTHD(push, NVC57E, SET_SIZE_IN,
+		  NVVAL(NVC57E, SET_SIZE_IN, WIDTH, asyw->state.src_w >> 16) |
+		  NVVAL(NVC57E, SET_SIZE_IN, HEIGHT, asyw->state.src_h >> 16));
+
+	PUSH_MTHD(push, NVC57E, SET_SIZE_OUT,
+		  NVVAL(NVC57E, SET_SIZE_OUT, WIDTH, asyw->state.crtc_w) |
+		  NVVAL(NVC57E, SET_SIZE_OUT, HEIGHT, asyw->state.crtc_h));
+	return 0;
+}
+
+static const struct nv50_wndw_func
+wndwc67e = {
+	.acquire = wndwc37e_acquire,
+	.release = wndwc37e_release,
+	.sema_set = wndwc37e_sema_set,
+	.sema_clr = wndwc37e_sema_clr,
+	.ntfy_set = wndwc37e_ntfy_set,
+	.ntfy_clr = wndwc37e_ntfy_clr,
+	.ntfy_reset = corec37d_ntfy_init,
+	.ntfy_wait_begun = base507c_ntfy_wait_begun,
+	.ilut = wndwc57e_ilut,
+	.ilut_identity = true,
+	.ilut_size = 1024,
+	.xlut_set = wndwc57e_ilut_set,
+	.xlut_clr = wndwc57e_ilut_clr,
+	.csc = base907c_csc,
+	.csc_set = wndwc57e_csc_set,
+	.csc_clr = wndwc57e_csc_clr,
+	.image_set = wndwc67e_image_set,
+	.image_clr = wndwc37e_image_clr,
+	.blend_set = wndwc37e_blend_set,
+	.update = wndwc37e_update,
+};
+
+int
+wndwc67e_new(struct nouveau_drm *drm, enum drm_plane_type type, int index,
+	     s32 oclass, struct nv50_wndw **pwndw)
+{
+	return wndwc37e_new_(&wndwc67e, drm, type, index, oclass, BIT(index >> 1), pwndw);
+}
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h
index cd9a2e687bb61ba96cfe347ef3b25cc9e8e78fb5..57d4f457a7d4ad03b4bd998a1ef1777923ebab83 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h
@@ -33,6 +33,7 @@ struct nv_device_info_v0 {
 #define NV_DEVICE_INFO_V0_PASCAL                                           0x0a
 #define NV_DEVICE_INFO_V0_VOLTA                                            0x0b
 #define NV_DEVICE_INFO_V0_TURING                                           0x0c
+#define NV_DEVICE_INFO_V0_AMPERE                                           0x0d
 	__u8  family;
 	__u8  pad06[2];
 	__u64 ram_size;
diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h
index 2c79beb41126fa75ff55c2479afdba27db841320..ba2c28ea43d20f46b6aca9a2e4e2798a50de45d0 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/class.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/class.h
@@ -88,6 +88,7 @@
 #define GP102_DISP                                    /* cl5070.h */ 0x00009870
 #define GV100_DISP                                    /* cl5070.h */ 0x0000c370
 #define TU102_DISP                                    /* cl5070.h */ 0x0000c570
+#define GA102_DISP                                    /* cl5070.h */ 0x0000c670
 
 #define GV100_DISP_CAPS                                              0x0000c373
 
@@ -103,6 +104,7 @@
 #define GK104_DISP_CURSOR                             /* cl507a.h */ 0x0000917a
 #define GV100_DISP_CURSOR                             /* cl507a.h */ 0x0000c37a
 #define TU102_DISP_CURSOR                             /* cl507a.h */ 0x0000c57a
+#define GA102_DISP_CURSOR                             /* cl507a.h */ 0x0000c67a
 
 #define NV50_DISP_OVERLAY                             /* cl507b.h */ 0x0000507b
 #define G82_DISP_OVERLAY                              /* cl507b.h */ 0x0000827b
@@ -112,6 +114,7 @@
 
 #define GV100_DISP_WINDOW_IMM_CHANNEL_DMA             /* clc37b.h */ 0x0000c37b
 #define TU102_DISP_WINDOW_IMM_CHANNEL_DMA             /* clc37b.h */ 0x0000c57b
+#define GA102_DISP_WINDOW_IMM_CHANNEL_DMA             /* clc37b.h */ 0x0000c67b
 
 #define NV50_DISP_BASE_CHANNEL_DMA                    /* cl507c.h */ 0x0000507c
 #define G82_DISP_BASE_CHANNEL_DMA                     /* cl507c.h */ 0x0000827c
@@ -135,6 +138,7 @@
 #define GP102_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000987d
 #define GV100_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000c37d
 #define TU102_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000c57d
+#define GA102_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000c67d
 
 #define NV50_DISP_OVERLAY_CHANNEL_DMA                 /* cl507e.h */ 0x0000507e
 #define G82_DISP_OVERLAY_CHANNEL_DMA                  /* cl507e.h */ 0x0000827e
@@ -145,6 +149,7 @@
 
 #define GV100_DISP_WINDOW_CHANNEL_DMA                 /* clc37e.h */ 0x0000c37e
 #define TU102_DISP_WINDOW_CHANNEL_DMA                 /* clc37e.h */ 0x0000c57e
+#define GA102_DISP_WINDOW_CHANNEL_DMA                 /* clc37e.h */ 0x0000c67e
 
 #define NV50_TESLA                                                   0x00005097
 #define G82_TESLA                                                    0x00008297
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
index 5c007ce62fc34e1a42f5d2be2d0da98106a76416..c920939a1467946c1c2281a1798d92dda5dc8623 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
@@ -120,6 +120,7 @@ struct nvkm_device {
 		GP100    = 0x130,
 		GV100    = 0x140,
 		TU100    = 0x160,
+		GA100    = 0x170,
 	} card_type;
 	u32 chipset;
 	u8  chiprev;
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
index 5a96c942d912f0c68edc9227fd7eab1ef08ab503..0f6fa6631a197aaa7f26479e5bf24192678a0091 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
@@ -37,4 +37,5 @@ int gp100_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gp102_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gv100_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int tu102_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
+int ga102_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h
index 1a39e52e09e36e7a92f5c104509c20f9cf17a7b0..50cc7c05eac49c64587bc3c90b24994e82505566 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h
@@ -32,4 +32,5 @@ int gm107_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
 int gm200_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
 int gv100_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
 int tu102_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
+int ga100_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
index 34b56b10218a8f782ef0a581eabbbed9b3bf14ed..2ecd52aec1d121d050844e1e2e8b425c64c54aee 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
@@ -86,6 +86,8 @@ int gp100_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
 int gp102_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
 int gp10b_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
 int gv100_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
+int ga100_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
+int ga102_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
 
 #include <subdev/bios.h>
 #include <subdev/bios/ramcfg.h>
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h
index eaacf8d80527cfc080ab34fccd496f16090bc27e..cdcce5ece6ff5973426f791cb4f02fc27537a904 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h
@@ -37,4 +37,5 @@ int nv50_gpio_new(struct nvkm_device *, int, struct nvkm_gpio **);
 int g94_gpio_new(struct nvkm_device *, int, struct nvkm_gpio **);
 int gf119_gpio_new(struct nvkm_device *, int, struct nvkm_gpio **);
 int gk104_gpio_new(struct nvkm_device *, int, struct nvkm_gpio **);
+int ga102_gpio_new(struct nvkm_device *, int, struct nvkm_gpio **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h
index 81b977319640ae9d7331ea07c78c1e89a313edd6..640f649ce497e4a9c93f8fc5ad1940f4ba205125 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h
@@ -92,6 +92,7 @@ int g94_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **);
 int gf117_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **);
 int gf119_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **);
 int gk104_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **);
+int gk110_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **);
 int gm200_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **);
 
 static inline int
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
index 6641fe4c252c62bf8d2c4c42d97e2e64a984489c..e45ca458396709be315edb7cf88c36604db96dc4 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
@@ -32,4 +32,5 @@ int gk20a_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 int gp100_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 int gp10b_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 int tu102_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
+int ga100_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c
index c7a94c94dbf378289b6cbcbd76b51f8b47d18497..72f35a2babcb20eef0450df7f25f4ee71dc2d9f0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_backlight.c
+++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c
@@ -256,6 +256,7 @@ nouveau_backlight_init(struct drm_connector *connector)
 	case NV_DEVICE_INFO_V0_PASCAL:
 	case NV_DEVICE_INFO_V0_VOLTA:
 	case NV_DEVICE_INFO_V0_TURING:
+	case NV_DEVICE_INFO_V0_AMPERE: //XXX: not confirmed
 		ret = nv50_backlight_init(nv_encoder, &props, &ops);
 		break;
 	default:
diff --git a/drivers/gpu/drm/nouveau/nvif/disp.c b/drivers/gpu/drm/nouveau/nvif/disp.c
index 8d0d30e08f57eff1842fc7fe1d98526ef14a392a..529cb60d5efb020faf0e4a81b45b5cd133fa5a6f 100644
--- a/drivers/gpu/drm/nouveau/nvif/disp.c
+++ b/drivers/gpu/drm/nouveau/nvif/disp.c
@@ -35,6 +35,7 @@ nvif_disp_ctor(struct nvif_device *device, const char *name, s32 oclass,
 	       struct nvif_disp *disp)
 {
 	static const struct nvif_mclass disps[] = {
+		{ GA102_DISP, -1 },
 		{ TU102_DISP, -1 },
 		{ GV100_DISP, -1 },
 		{ GP102_DISP, -1 },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index 7851bec5f0e5feccd8b2be5f1fbcb2b6180a21bf..cdcc851e06f9b5d8e82341048f8e77e53f07363c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -1815,7 +1815,7 @@ nvf0_chipset = {
 	.fb = gk110_fb_new,
 	.fuse = gf100_fuse_new,
 	.gpio = gk104_gpio_new,
-	.i2c = gk104_i2c_new,
+	.i2c = gk110_i2c_new,
 	.ibus = gk104_ibus_new,
 	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
@@ -1853,7 +1853,7 @@ nvf1_chipset = {
 	.fb = gk110_fb_new,
 	.fuse = gf100_fuse_new,
 	.gpio = gk104_gpio_new,
-	.i2c = gk104_i2c_new,
+	.i2c = gk110_i2c_new,
 	.ibus = gk104_ibus_new,
 	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
@@ -1891,7 +1891,7 @@ nv106_chipset = {
 	.fb = gk110_fb_new,
 	.fuse = gf100_fuse_new,
 	.gpio = gk104_gpio_new,
-	.i2c = gk104_i2c_new,
+	.i2c = gk110_i2c_new,
 	.ibus = gk104_ibus_new,
 	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
@@ -1929,7 +1929,7 @@ nv108_chipset = {
 	.fb = gk110_fb_new,
 	.fuse = gf100_fuse_new,
 	.gpio = gk104_gpio_new,
-	.i2c = gk104_i2c_new,
+	.i2c = gk110_i2c_new,
 	.ibus = gk104_ibus_new,
 	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
@@ -1967,7 +1967,7 @@ nv117_chipset = {
 	.fb = gm107_fb_new,
 	.fuse = gm107_fuse_new,
 	.gpio = gk104_gpio_new,
-	.i2c = gk104_i2c_new,
+	.i2c = gk110_i2c_new,
 	.ibus = gk104_ibus_new,
 	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
@@ -2003,7 +2003,7 @@ nv118_chipset = {
 	.fb = gm107_fb_new,
 	.fuse = gm107_fuse_new,
 	.gpio = gk104_gpio_new,
-	.i2c = gk104_i2c_new,
+	.i2c = gk110_i2c_new,
 	.ibus = gk104_ibus_new,
 	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
@@ -2652,6 +2652,61 @@ nv168_chipset = {
 	.sec2 = tu102_sec2_new,
 };
 
+static const struct nvkm_device_chip
+nv170_chipset = {
+	.name = "GA100",
+	.bar = tu102_bar_new,
+	.bios = nvkm_bios_new,
+	.devinit = ga100_devinit_new,
+	.fb = ga100_fb_new,
+	.gpio = gk104_gpio_new,
+	.i2c = gm200_i2c_new,
+	.ibus = gm200_ibus_new,
+	.imem = nv50_instmem_new,
+	.mc = ga100_mc_new,
+	.mmu = tu102_mmu_new,
+	.pci = gp100_pci_new,
+	.timer = gk20a_timer_new,
+};
+
+static const struct nvkm_device_chip
+nv172_chipset = {
+	.name = "GA102",
+	.bar = tu102_bar_new,
+	.bios = nvkm_bios_new,
+	.devinit = ga100_devinit_new,
+	.fb = ga102_fb_new,
+	.gpio = ga102_gpio_new,
+	.i2c = gm200_i2c_new,
+	.ibus = gm200_ibus_new,
+	.imem = nv50_instmem_new,
+	.mc = ga100_mc_new,
+	.mmu = tu102_mmu_new,
+	.pci = gp100_pci_new,
+	.timer = gk20a_timer_new,
+	.disp = ga102_disp_new,
+	.dma = gv100_dma_new,
+};
+
+static const struct nvkm_device_chip
+nv174_chipset = {
+	.name = "GA104",
+	.bar = tu102_bar_new,
+	.bios = nvkm_bios_new,
+	.devinit = ga100_devinit_new,
+	.fb = ga102_fb_new,
+	.gpio = ga102_gpio_new,
+	.i2c = gm200_i2c_new,
+	.ibus = gm200_ibus_new,
+	.imem = nv50_instmem_new,
+	.mc = ga100_mc_new,
+	.mmu = tu102_mmu_new,
+	.pci = gp100_pci_new,
+	.timer = gk20a_timer_new,
+	.disp = ga102_disp_new,
+	.dma = gv100_dma_new,
+};
+
 static int
 nvkm_device_event_ctor(struct nvkm_object *object, void *data, u32 size,
 		       struct nvkm_notify *notify)
@@ -3063,6 +3118,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 			case 0x130: device->card_type = GP100; break;
 			case 0x140: device->card_type = GV100; break;
 			case 0x160: device->card_type = TU100; break;
+			case 0x170: device->card_type = GA100; break;
 			default:
 				break;
 			}
@@ -3160,10 +3216,23 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 		case 0x166: device->chip = &nv166_chipset; break;
 		case 0x167: device->chip = &nv167_chipset; break;
 		case 0x168: device->chip = &nv168_chipset; break;
+		case 0x172: device->chip = &nv172_chipset; break;
+		case 0x174: device->chip = &nv174_chipset; break;
 		default:
-			nvdev_error(device, "unknown chipset (%08x)\n", boot0);
-			ret = -ENODEV;
-			goto done;
+			if (nvkm_boolopt(device->cfgopt, "NvEnableUnsupportedChipsets", false)) {
+				switch (device->chipset) {
+				case 0x170: device->chip = &nv170_chipset; break;
+				default:
+					break;
+				}
+			}
+
+			if (!device->chip) {
+				nvdev_error(device, "unknown chipset (%08x)\n", boot0);
+				ret = -ENODEV;
+				goto done;
+			}
+			break;
 		}
 
 		nvdev_info(device, "NVIDIA %s (%08x)\n",
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
index 03c6d9aef075cca0b13770d25d941602bb7ef49f..1478947987860d664a89247afaecd8e7d46c9dc7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
@@ -176,6 +176,7 @@ nvkm_udevice_info(struct nvkm_udevice *udev, void *data, u32 size)
 	case GP100: args->v0.family = NV_DEVICE_INFO_V0_PASCAL; break;
 	case GV100: args->v0.family = NV_DEVICE_INFO_V0_VOLTA; break;
 	case TU100: args->v0.family = NV_DEVICE_INFO_V0_TURING; break;
+	case GA100: args->v0.family = NV_DEVICE_INFO_V0_AMPERE; break;
 	default:
 		args->v0.family = 0;
 		break;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
index cf075311cdd27ae189b5c00cedc589f14b93f44e..b03f043efe261542c267a0a7a3ead68858c36f82 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
@@ -17,6 +17,7 @@ nvkm-y += nvkm/engine/disp/gp100.o
 nvkm-y += nvkm/engine/disp/gp102.o
 nvkm-y += nvkm/engine/disp/gv100.o
 nvkm-y += nvkm/engine/disp/tu102.o
+nvkm-y += nvkm/engine/disp/ga102.o
 nvkm-y += nvkm/engine/disp/vga.o
 
 nvkm-y += nvkm/engine/disp/head.o
@@ -42,6 +43,7 @@ nvkm-y += nvkm/engine/disp/sorgm200.o
 nvkm-y += nvkm/engine/disp/sorgp100.o
 nvkm-y += nvkm/engine/disp/sorgv100.o
 nvkm-y += nvkm/engine/disp/sortu102.o
+nvkm-y += nvkm/engine/disp/sorga102.o
 
 nvkm-y += nvkm/engine/disp/outp.o
 nvkm-y += nvkm/engine/disp/dp.o
@@ -75,6 +77,7 @@ nvkm-y += nvkm/engine/disp/rootgp100.o
 nvkm-y += nvkm/engine/disp/rootgp102.o
 nvkm-y += nvkm/engine/disp/rootgv100.o
 nvkm-y += nvkm/engine/disp/roottu102.o
+nvkm-y += nvkm/engine/disp/rootga102.o
 
 nvkm-y += nvkm/engine/disp/capsgv100.o
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c
index 3800aeb507d0167d4e7bb41210c6fb5fd925cdc2..55fbfe28c6dc1a50da06bc117a95cc91b303b4ac 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c
@@ -33,6 +33,12 @@
 
 #include <nvif/event.h>
 
+/* IED scripts are no longer used by UEFI/RM from Ampere, but have been updated for
+ * the x86 option ROM.  However, the relevant VBIOS table versions weren't modified,
+ * so we're unable to detect this in a nice way.
+ */
+#define AMPERE_IED_HACK(disp) ((disp)->engine.subdev.device->card_type >= GA100)
+
 struct lt_state {
 	struct nvkm_dp *dp;
 	u8  stat[6];
@@ -238,6 +244,19 @@ nvkm_dp_train_links(struct nvkm_dp *dp)
 		dp->dpcd[DPCD_RC02] &= ~DPCD_RC02_TPS3_SUPPORTED;
 	lt.pc2 = dp->dpcd[DPCD_RC02] & DPCD_RC02_TPS3_SUPPORTED;
 
+	if (AMPERE_IED_HACK(disp) && (lnkcmp = lt.dp->info.script[0])) {
+		/* Execute BeforeLinkTraining script from DP Info table. */
+		while (ior->dp.bw < nvbios_rd08(bios, lnkcmp))
+			lnkcmp += 3;
+		lnkcmp = nvbios_rd16(bios, lnkcmp + 1);
+
+		nvbios_init(&dp->outp.disp->engine.subdev, lnkcmp,
+			init.outp = &dp->outp.info;
+			init.or   = ior->id;
+			init.link = ior->asy.link;
+		);
+	}
+
 	/* Set desired link configuration on the source. */
 	if ((lnkcmp = lt.dp->info.lnkcmp)) {
 		if (dp->version < 0x30) {
@@ -316,12 +335,14 @@ nvkm_dp_train_init(struct nvkm_dp *dp)
 		);
 	}
 
-	/* Execute BeforeLinkTraining script from DP Info table. */
-	nvbios_init(&dp->outp.disp->engine.subdev, dp->info.script[0],
-		init.outp = &dp->outp.info;
-		init.or   = dp->outp.ior->id;
-		init.link = dp->outp.ior->asy.link;
-	);
+	if (!AMPERE_IED_HACK(dp->outp.disp)) {
+		/* Execute BeforeLinkTraining script from DP Info table. */
+		nvbios_init(&dp->outp.disp->engine.subdev, dp->info.script[0],
+			init.outp = &dp->outp.info;
+			init.or   = dp->outp.ior->id;
+			init.link = dp->outp.ior->asy.link;
+		);
+	}
 }
 
 static const struct dp_rates {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c
new file mode 100644
index 0000000000000000000000000000000000000000..aa2e5645fe365929b44849d8407d5443ad0103f7
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "nv50.h"
+#include "head.h"
+#include "ior.h"
+#include "channv50.h"
+#include "rootnv50.h"
+
+static const struct nv50_disp_func
+ga102_disp = {
+	.init = tu102_disp_init,
+	.fini = gv100_disp_fini,
+	.intr = gv100_disp_intr,
+	.uevent = &gv100_disp_chan_uevent,
+	.super = gv100_disp_super,
+	.root = &ga102_disp_root_oclass,
+	.wndw = { .cnt = gv100_disp_wndw_cnt },
+	.head = { .cnt = gv100_head_cnt, .new = gv100_head_new },
+	.sor = { .cnt = gv100_sor_cnt, .new = ga102_sor_new },
+	.ramht_size = 0x2000,
+};
+
+int
+ga102_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
+{
+	return nv50_disp_new_(&ga102_disp, device, index, pdisp);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h
index 09f3038eff26fba9b411681dd66db46cf5f5273f..9f0bb7c6b0100f07d7d4b1dd36a94b5db91b072e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h
@@ -150,6 +150,8 @@ void gv100_sor_dp_audio(struct nvkm_ior *, int, bool);
 void gv100_sor_dp_audio_sym(struct nvkm_ior *, int, u16, u32);
 void gv100_sor_dp_watermark(struct nvkm_ior *, int, u8);
 
+void tu102_sor_dp_vcpi(struct nvkm_ior *, int, u8, u8, u16, u16);
+
 void g84_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8);
 void gt215_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8);
 void gf119_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8);
@@ -207,4 +209,6 @@ int gv100_sor_cnt(struct nvkm_disp *, unsigned long *);
 int gv100_sor_new(struct nvkm_disp *, int);
 
 int tu102_sor_new(struct nvkm_disp *, int);
+
+int ga102_sor_new(struct nvkm_disp *, int);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h
index a677161c7f3a6c77096a0dedc155e63b05a87cb1..db31b37752a270b163b9b1ea8366546bef2470fa 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h
@@ -86,6 +86,8 @@ void gv100_disp_intr(struct nv50_disp *);
 void gv100_disp_super(struct work_struct *);
 int gv100_disp_wndw_cnt(struct nvkm_disp *, unsigned long *);
 
+int tu102_disp_init(struct nv50_disp *);
+
 void nv50_disp_dptmds_war_2(struct nv50_disp *, struct dcb_output *);
 void nv50_disp_dptmds_war_3(struct nv50_disp *, struct dcb_output *);
 void nv50_disp_update_sppll1(struct nv50_disp *);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootga102.c
new file mode 100644
index 0000000000000000000000000000000000000000..9af07c3cf9fc06998f10de5cff3ab322b6af782c
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootga102.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "rootnv50.h"
+#include "channv50.h"
+
+#include <nvif/class.h>
+
+static const struct nv50_disp_root_func
+ga102_disp_root = {
+	.user = {
+		{{-1,-1,GV100_DISP_CAPS                }, gv100_disp_caps_new },
+		{{0,0,GA102_DISP_CURSOR                }, gv100_disp_curs_new },
+		{{0,0,GA102_DISP_WINDOW_IMM_CHANNEL_DMA}, gv100_disp_wimm_new },
+		{{0,0,GA102_DISP_CORE_CHANNEL_DMA      }, gv100_disp_core_new },
+		{{0,0,GA102_DISP_WINDOW_CHANNEL_DMA    }, gv100_disp_wndw_new },
+		{}
+	},
+};
+
+static int
+ga102_disp_root_new(struct nvkm_disp *disp, const struct nvkm_oclass *oclass,
+		    void *data, u32 size, struct nvkm_object **pobject)
+{
+	return nv50_disp_root_new_(&ga102_disp_root, disp, oclass, data, size, pobject);
+}
+
+const struct nvkm_disp_oclass
+ga102_disp_root_oclass = {
+	.base.oclass = GA102_DISP,
+	.base.minver = -1,
+	.base.maxver = -1,
+	.ctor = ga102_disp_root_new,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
index 7070f5408d92bf617746b5e7baa76b45eff8f825..27bb170d02930441d99b1f09b49335b74f44e482 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
@@ -41,4 +41,5 @@ extern const struct nvkm_disp_oclass gp100_disp_root_oclass;
 extern const struct nvkm_disp_oclass gp102_disp_root_oclass;
 extern const struct nvkm_disp_oclass gv100_disp_root_oclass;
 extern const struct nvkm_disp_oclass tu102_disp_root_oclass;
+extern const struct nvkm_disp_oclass ga102_disp_root_oclass;
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorga102.c
new file mode 100644
index 0000000000000000000000000000000000000000..033827de91162cbaff4fd86c90bf170b84182698
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorga102.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ior.h"
+
+#include <subdev/timer.h>
+
+static int
+ga102_sor_dp_links(struct nvkm_ior *sor, struct nvkm_i2c_aux *aux)
+{
+	struct nvkm_device *device = sor->disp->engine.subdev.device;
+	const u32 soff = nv50_ior_base(sor);
+	const u32 loff = nv50_sor_link(sor);
+	u32 dpctrl = 0x00000000;
+	u32 clksor = 0x00000000;
+
+	switch (sor->dp.bw) {
+	case 0x06: clksor |= 0x00000000; break;
+	case 0x0a: clksor |= 0x00040000; break;
+	case 0x14: clksor |= 0x00080000; break;
+	case 0x1e: clksor |= 0x000c0000; break;
+	default:
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	dpctrl |= ((1 << sor->dp.nr) - 1) << 16;
+	if (sor->dp.mst)
+		dpctrl |= 0x40000000;
+	if (sor->dp.ef)
+		dpctrl |= 0x00004000;
+
+	nvkm_mask(device, 0x612300 + soff, 0x007c0000, clksor);
+
+	/*XXX*/
+	nvkm_msec(device, 40, NVKM_DELAY);
+	nvkm_mask(device, 0x612300 + soff, 0x00030000, 0x00010000);
+	nvkm_mask(device, 0x61c10c + loff, 0x00000003, 0x00000001);
+
+	nvkm_mask(device, 0x61c10c + loff, 0x401f4000, dpctrl);
+	return 0;
+}
+
+static void
+ga102_sor_clock(struct nvkm_ior *sor)
+{
+	struct nvkm_device *device = sor->disp->engine.subdev.device;
+	u32 div2 = 0;
+	if (sor->asy.proto == TMDS) {
+		if (sor->tmds.high_speed)
+			div2 = 1;
+	}
+	nvkm_wr32(device, 0x00ec08 + (sor->id * 0x10), 0x00000000);
+	nvkm_wr32(device, 0x00ec04 + (sor->id * 0x10), div2);
+}
+
+static const struct nvkm_ior_func
+ga102_sor_hda = {
+	.route = {
+		.get = gm200_sor_route_get,
+		.set = gm200_sor_route_set,
+	},
+	.state = gv100_sor_state,
+	.power = nv50_sor_power,
+	.clock = ga102_sor_clock,
+	.hdmi = {
+		.ctrl = gv100_hdmi_ctrl,
+		.scdc = gm200_hdmi_scdc,
+	},
+	.dp = {
+		.lanes = { 0, 1, 2, 3 },
+		.links = ga102_sor_dp_links,
+		.power = g94_sor_dp_power,
+		.pattern = gm107_sor_dp_pattern,
+		.drive = gm200_sor_dp_drive,
+		.vcpi = tu102_sor_dp_vcpi,
+		.audio = gv100_sor_dp_audio,
+		.audio_sym = gv100_sor_dp_audio_sym,
+		.watermark = gv100_sor_dp_watermark,
+	},
+	.hda = {
+		.hpd = gf119_hda_hpd,
+		.eld = gf119_hda_eld,
+		.device_entry = gv100_hda_device_entry,
+	},
+};
+
+static const struct nvkm_ior_func
+ga102_sor = {
+	.route = {
+		.get = gm200_sor_route_get,
+		.set = gm200_sor_route_set,
+	},
+	.state = gv100_sor_state,
+	.power = nv50_sor_power,
+	.clock = ga102_sor_clock,
+	.hdmi = {
+		.ctrl = gv100_hdmi_ctrl,
+		.scdc = gm200_hdmi_scdc,
+	},
+	.dp = {
+		.lanes = { 0, 1, 2, 3 },
+		.links = ga102_sor_dp_links,
+		.power = g94_sor_dp_power,
+		.pattern = gm107_sor_dp_pattern,
+		.drive = gm200_sor_dp_drive,
+		.vcpi = tu102_sor_dp_vcpi,
+		.audio = gv100_sor_dp_audio,
+		.audio_sym = gv100_sor_dp_audio_sym,
+		.watermark = gv100_sor_dp_watermark,
+	},
+};
+
+int
+ga102_sor_new(struct nvkm_disp *disp, int id)
+{
+	struct nvkm_device *device = disp->engine.subdev.device;
+	u32 hda = nvkm_rd32(device, 0x08a15c);
+	if (hda & BIT(id))
+		return nvkm_ior_new_(&ga102_sor_hda, disp, SOR, id);
+	return nvkm_ior_new_(&ga102_sor, disp, SOR, id);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c
index 59865a934c4b9d4e3e0c85ab4f47a0bd86b2073b..0cf9e8752d2588c83b146d3534907a7c7b7c10b6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c
@@ -23,7 +23,7 @@
 
 #include <subdev/timer.h>
 
-static void
+void
 tu102_sor_dp_vcpi(struct nvkm_ior *sor, int head,
 		  u8 slot, u8 slot_nr, u16 pbn, u16 aligned)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c
index 883ae4151ff8887074e63fec782a211b07a5fa94..4c85d1d4fbd4265cabd179923519412766473114 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c
@@ -28,7 +28,7 @@
 #include <core/gpuobj.h>
 #include <subdev/timer.h>
 
-static int
+int
 tu102_disp_init(struct nv50_disp *disp)
 {
 	struct nvkm_device *device = disp->base.engine.subdev.device;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c
index 7deb81b6dbac6bc5a440da281eca94b51b632f83..4b571cc6bc70f4cacb934cd731aed683ca8ec0f0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c
@@ -75,7 +75,7 @@ shadow_image(struct nvkm_bios *bios, int idx, u32 offset, struct shadow *mthd)
 	nvkm_debug(subdev, "%08x: type %02x, %d bytes\n",
 		   image.base, image.type, image.size);
 
-	if (!shadow_fetch(bios, mthd, image.size)) {
+	if (!shadow_fetch(bios, mthd, image.base + image.size)) {
 		nvkm_debug(subdev, "%08x: fetch failed\n", image.base);
 		return 0;
 	}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c
index 3634cd0630b81ce4d697261ee0100f1ce4ec5b39..023ddc7c5399a40ffbb19da8fb14cf53a4f4871a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c
@@ -64,6 +64,9 @@ pramin_init(struct nvkm_bios *bios, const char *name)
 		return NULL;
 
 	/* we can't get the bios image pointer without PDISP */
+	if (device->card_type >= GA100)
+		addr = device->chipset == 0x170; /*XXX: find the fuse reg for this */
+	else
 	if (device->card_type >= GM100)
 		addr = nvkm_rd32(device, 0x021c04);
 	else
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild
index b3429371ed824b7bd4b3043b0118accbcdf48655..d1abb64841dac7ad969e4ab1ff688c03c5060095 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild
@@ -15,3 +15,4 @@ nvkm-y += nvkm/subdev/devinit/gm107.o
 nvkm-y += nvkm/subdev/devinit/gm200.o
 nvkm-y += nvkm/subdev/devinit/gv100.o
 nvkm-y += nvkm/subdev/devinit/tu102.o
+nvkm-y += nvkm/subdev/devinit/ga100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/ga100.c
new file mode 100644
index 0000000000000000000000000000000000000000..636a92128f6c8cab99160cee3f8c392ceb0f11df
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/ga100.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "nv50.h"
+
+#include <subdev/bios.h>
+#include <subdev/bios/pll.h>
+#include <subdev/clk/pll.h>
+
+static int
+ga100_devinit_pll_set(struct nvkm_devinit *init, u32 type, u32 freq)
+{
+	struct nvkm_subdev *subdev = &init->subdev;
+	struct nvkm_device *device = subdev->device;
+	struct nvbios_pll info;
+	int head = type - PLL_VPLL0;
+	int N, fN, M, P;
+	int ret;
+
+	ret = nvbios_pll_parse(device->bios, type, &info);
+	if (ret)
+		return ret;
+
+	ret = gt215_pll_calc(subdev, &info, freq, &N, &fN, &M, &P);
+	if (ret < 0)
+		return ret;
+
+	switch (info.type) {
+	case PLL_VPLL0:
+	case PLL_VPLL1:
+	case PLL_VPLL2:
+	case PLL_VPLL3:
+		nvkm_wr32(device, 0x00ef00 + (head * 0x40), 0x02080004);
+		nvkm_wr32(device, 0x00ef18 + (head * 0x40), (N << 16) | fN);
+		nvkm_wr32(device, 0x00ef04 + (head * 0x40), (P << 16) | M);
+		nvkm_wr32(device, 0x00e9c0 + (head * 0x04), 0x00000001);
+		break;
+	default:
+		nvkm_warn(subdev, "%08x/%dKhz unimplemented\n", type, freq);
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static const struct nvkm_devinit_func
+ga100_devinit = {
+	.init = nv50_devinit_init,
+	.post = tu102_devinit_post,
+	.pll_set = ga100_devinit_pll_set,
+};
+
+int
+ga100_devinit_new(struct nvkm_device *device, int index, struct nvkm_devinit **pinit)
+{
+	return nv50_devinit_new_(&ga100_devinit, device, index, pinit);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/priv.h
index 94723352137a7f9730f9a87ead7dfbafb16a9ca9..05961e624264754078e6bc48bc669db3f978c54f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/priv.h
@@ -19,4 +19,5 @@ void nvkm_devinit_ctor(const struct nvkm_devinit_func *, struct nvkm_device *,
 		       int index, struct nvkm_devinit *);
 
 int nv04_devinit_post(struct nvkm_devinit *, bool);
+int tu102_devinit_post(struct nvkm_devinit *, bool);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c
index 397670e72fff936a4e7d1812490198996e318093..9a469bf482f2f5bb13b270c625d9fc37b476df2c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c
@@ -65,7 +65,7 @@ tu102_devinit_pll_set(struct nvkm_devinit *init, u32 type, u32 freq)
 	return ret;
 }
 
-static int
+int
 tu102_devinit_post(struct nvkm_devinit *base, bool post)
 {
 	struct nv50_devinit *init = nv50_devinit(base);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild
index 43a42159a3d00e5fe432f997ba92565a612ec2f2..5d0bab8ecb433d60dd28306048f4f7ca656ea483 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild
@@ -32,6 +32,8 @@ nvkm-y += nvkm/subdev/fb/gp100.o
 nvkm-y += nvkm/subdev/fb/gp102.o
 nvkm-y += nvkm/subdev/fb/gp10b.o
 nvkm-y += nvkm/subdev/fb/gv100.o
+nvkm-y += nvkm/subdev/fb/ga100.o
+nvkm-y += nvkm/subdev/fb/ga102.o
 
 nvkm-y += nvkm/subdev/fb/ram.o
 nvkm-y += nvkm/subdev/fb/ramnv04.o
@@ -52,6 +54,7 @@ nvkm-y += nvkm/subdev/fb/ramgk104.o
 nvkm-y += nvkm/subdev/fb/ramgm107.o
 nvkm-y += nvkm/subdev/fb/ramgm200.o
 nvkm-y += nvkm/subdev/fb/ramgp100.o
+nvkm-y += nvkm/subdev/fb/ramga102.o
 nvkm-y += nvkm/subdev/fb/sddr2.o
 nvkm-y += nvkm/subdev/fb/sddr3.o
 nvkm-y += nvkm/subdev/fb/gddr3.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c
new file mode 100644
index 0000000000000000000000000000000000000000..bf82686851cd4a5b47e830448c49058ddf92c615
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "gf100.h"
+#include "ram.h"
+
+static const struct nvkm_fb_func
+ga100_fb = {
+	.dtor = gf100_fb_dtor,
+	.oneinit = gf100_fb_oneinit,
+	.init = gp100_fb_init,
+	.init_page = gv100_fb_init_page,
+	.init_unkn = gp100_fb_init_unkn,
+	.ram_new = gp100_ram_new,
+	.default_bigpage = 16,
+};
+
+int
+ga100_fb_new(struct nvkm_device *device, int index, struct nvkm_fb **pfb)
+{
+	return gp102_fb_new_(&ga100_fb, device, index, pfb);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c
new file mode 100644
index 0000000000000000000000000000000000000000..bcecf84a6e67995a50d527cf73dab04a77e67105
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "gf100.h"
+#include "ram.h"
+
+static const struct nvkm_fb_func
+ga102_fb = {
+	.dtor = gf100_fb_dtor,
+	.oneinit = gf100_fb_oneinit,
+	.init = gp100_fb_init,
+	.init_page = gv100_fb_init_page,
+	.init_unkn = gp100_fb_init_unkn,
+	.ram_new = ga102_ram_new,
+	.default_bigpage = 16,
+};
+
+int
+ga102_fb_new(struct nvkm_device *device, int index, struct nvkm_fb **pfb)
+{
+	return gp102_fb_new_(&ga102_fb, device, index, pfb);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c
index 10ff5d053f7ea4e0f0645028906da0d6acc2a031..feda86a5fba8592000ac2657a815b72203cbaf91 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c
@@ -22,7 +22,7 @@
 #include "gf100.h"
 #include "ram.h"
 
-static int
+int
 gv100_fb_init_page(struct nvkm_fb *fb)
 {
 	return (fb->page == 16) ? 0 : -EINVAL;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h
index 5be9c563350d7b684e4bdb3a4ca5f90805d09d28..66932ac10d15c68b65e80d982a90e54873e3d393 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h
@@ -82,4 +82,6 @@ int gp102_fb_new_(const struct nvkm_fb_func *, struct nvkm_device *, int,
 		  struct nvkm_fb **);
 bool gp102_fb_vpr_scrub_required(struct nvkm_fb *);
 int gp102_fb_vpr_scrub(struct nvkm_fb *);
+
+int gv100_fb_init_page(struct nvkm_fb *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h
index d723a9b4e3c47006903c362b60f18da649b775b4..ea7d66f3dd825dba1f4eb6162e9a66e163d1677a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h
@@ -70,4 +70,5 @@ int gk104_ram_new(struct nvkm_fb *, struct nvkm_ram **);
 int gm107_ram_new(struct nvkm_fb *, struct nvkm_ram **);
 int gm200_ram_new(struct nvkm_fb *, struct nvkm_ram **);
 int gp100_ram_new(struct nvkm_fb *, struct nvkm_ram **);
+int ga102_ram_new(struct nvkm_fb *, struct nvkm_ram **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramga102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramga102.c
new file mode 100644
index 0000000000000000000000000000000000000000..298c136cefe0cec12539a1d6f4dd3aa32a0eeed1
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramga102.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ram.h"
+
+#include <subdev/bios.h>
+#include <subdev/bios/init.h>
+#include <subdev/bios/rammap.h>
+
+static const struct nvkm_ram_func
+ga102_ram = {
+};
+
+int
+ga102_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram)
+{
+	struct nvkm_device *device = fb->subdev.device;
+	enum nvkm_ram_type type = nvkm_fb_bios_memtype(device->bios);
+	u32 size = nvkm_rd32(device, 0x1183a4);
+
+	return nvkm_ram_new_(&ga102_ram, fb, type, (u64)size << 20, pram);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/Kbuild
index b2ad5922a1c2dedb83d140a2ae2f561a08932850..efbbaa080de5172c1ebd02a49182e77337d0a240 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/Kbuild
@@ -5,3 +5,4 @@ nvkm-y += nvkm/subdev/gpio/nv50.o
 nvkm-y += nvkm/subdev/gpio/g94.o
 nvkm-y += nvkm/subdev/gpio/gf119.o
 nvkm-y += nvkm/subdev/gpio/gk104.o
+nvkm-y += nvkm/subdev/gpio/ga102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/ga102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/ga102.c
new file mode 100644
index 0000000000000000000000000000000000000000..62c791baf4008dde51f3db198d96835117578cce
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/ga102.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+static void
+ga102_gpio_reset(struct nvkm_gpio *gpio, u8 match)
+{
+	struct nvkm_device *device = gpio->subdev.device;
+	struct nvkm_bios *bios = device->bios;
+	u8 ver, len;
+	u16 entry;
+	int ent = -1;
+
+	while ((entry = dcb_gpio_entry(bios, 0, ++ent, &ver, &len))) {
+		u32 data = nvbios_rd32(bios, entry);
+		u8  line =   (data & 0x0000003f);
+		u8  defs = !!(data & 0x00000080);
+		u8  func =   (data & 0x0000ff00) >> 8;
+		u8  unk0 =   (data & 0x00ff0000) >> 16;
+		u8  unk1 =   (data & 0x1f000000) >> 24;
+
+		if ( func  == DCB_GPIO_UNUSED ||
+		    (match != DCB_GPIO_UNUSED && match != func))
+			continue;
+
+		nvkm_gpio_set(gpio, 0, func, line, defs);
+
+		nvkm_mask(device, 0x021200 + (line * 4), 0xff, unk0);
+		if (unk1--)
+			nvkm_mask(device, 0x00d740 + (unk1 * 4), 0xff, line);
+	}
+}
+
+static int
+ga102_gpio_drive(struct nvkm_gpio *gpio, int line, int dir, int out)
+{
+	struct nvkm_device *device = gpio->subdev.device;
+	u32 data = ((dir ^ 1) << 13) | (out << 12);
+	nvkm_mask(device, 0x021200 + (line * 4), 0x00003000, data);
+	nvkm_mask(device, 0x00d604, 0x00000001, 0x00000001); /* update? */
+	return 0;
+}
+
+static int
+ga102_gpio_sense(struct nvkm_gpio *gpio, int line)
+{
+	struct nvkm_device *device = gpio->subdev.device;
+	return !!(nvkm_rd32(device, 0x021200 + (line * 4)) & 0x00004000);
+}
+
+static void
+ga102_gpio_intr_stat(struct nvkm_gpio *gpio, u32 *hi, u32 *lo)
+{
+	struct nvkm_device *device = gpio->subdev.device;
+	u32 intr0 = nvkm_rd32(device, 0x021640);
+	u32 intr1 = nvkm_rd32(device, 0x02164c);
+	u32 stat0 = nvkm_rd32(device, 0x021648) & intr0;
+	u32 stat1 = nvkm_rd32(device, 0x021654) & intr1;
+	*lo = (stat1 & 0xffff0000) | (stat0 >> 16);
+	*hi = (stat1 << 16) | (stat0 & 0x0000ffff);
+	nvkm_wr32(device, 0x021640, intr0);
+	nvkm_wr32(device, 0x02164c, intr1);
+}
+
+static void
+ga102_gpio_intr_mask(struct nvkm_gpio *gpio, u32 type, u32 mask, u32 data)
+{
+	struct nvkm_device *device = gpio->subdev.device;
+	u32 inte0 = nvkm_rd32(device, 0x021648);
+	u32 inte1 = nvkm_rd32(device, 0x021654);
+	if (type & NVKM_GPIO_LO)
+		inte0 = (inte0 & ~(mask << 16)) | (data << 16);
+	if (type & NVKM_GPIO_HI)
+		inte0 = (inte0 & ~(mask & 0xffff)) | (data & 0xffff);
+	mask >>= 16;
+	data >>= 16;
+	if (type & NVKM_GPIO_LO)
+		inte1 = (inte1 & ~(mask << 16)) | (data << 16);
+	if (type & NVKM_GPIO_HI)
+		inte1 = (inte1 & ~mask) | data;
+	nvkm_wr32(device, 0x021648, inte0);
+	nvkm_wr32(device, 0x021654, inte1);
+}
+
+static const struct nvkm_gpio_func
+ga102_gpio = {
+	.lines = 32,
+	.intr_stat = ga102_gpio_intr_stat,
+	.intr_mask = ga102_gpio_intr_mask,
+	.drive = ga102_gpio_drive,
+	.sense = ga102_gpio_sense,
+	.reset = ga102_gpio_reset,
+};
+
+int
+ga102_gpio_new(struct nvkm_device *device, int index, struct nvkm_gpio **pgpio)
+{
+	return nvkm_gpio_new_(&ga102_gpio, device, index, pgpio);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild
index 723d0284caefc7d19757861a16e816c5e7913aff..819703913a00c4db2f0b00c048686acb9393e602 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild
@@ -7,6 +7,7 @@ nvkm-y += nvkm/subdev/i2c/g94.o
 nvkm-y += nvkm/subdev/i2c/gf117.o
 nvkm-y += nvkm/subdev/i2c/gf119.o
 nvkm-y += nvkm/subdev/i2c/gk104.o
+nvkm-y += nvkm/subdev/i2c/gk110.o
 nvkm-y += nvkm/subdev/i2c/gm200.o
 
 nvkm-y += nvkm/subdev/i2c/pad.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h
index 30b48896965ebc981c838087357dbfcc408ef054..f920eabf8628dc5db7219d20cb1829fda2cd5ccb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h
@@ -3,6 +3,13 @@
 #define __NVKM_I2C_AUX_H__
 #include "pad.h"
 
+static inline void
+nvkm_i2c_aux_autodpcd(struct nvkm_i2c *i2c, int aux, bool enable)
+{
+	if (i2c->func->aux_autodpcd)
+		i2c->func->aux_autodpcd(i2c, aux, false);
+}
+
 struct nvkm_i2c_aux_func {
 	bool address_only;
 	int  (*xfer)(struct nvkm_i2c_aux *, bool retry, u8 type,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c
index db7769cb33ebadfa10078c05f019c35cd4680338..47068f6f9c55d439eb0397d1fa254659776807a0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c
@@ -77,7 +77,8 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry,
 		 u8 type, u32 addr, u8 *data, u8 *size)
 {
 	struct g94_i2c_aux *aux = g94_i2c_aux(obj);
-	struct nvkm_device *device = aux->base.pad->i2c->subdev.device;
+	struct nvkm_i2c *i2c = aux->base.pad->i2c;
+	struct nvkm_device *device = i2c->subdev.device;
 	const u32 base = aux->ch * 0x50;
 	u32 ctrl, stat, timeout, retries = 0;
 	u32 xbuf[4] = {};
@@ -96,6 +97,8 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry,
 		goto out;
 	}
 
+	nvkm_i2c_aux_autodpcd(i2c, aux->ch, false);
+
 	if (!(type & 1)) {
 		memcpy(xbuf, data, *size);
 		for (i = 0; i < 16; i += 4) {
@@ -128,7 +131,7 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry,
 			if (!timeout--) {
 				AUX_ERR(&aux->base, "timeout %08x", ctrl);
 				ret = -EIO;
-				goto out;
+				goto out_err;
 			}
 		} while (ctrl & 0x00010000);
 		ret = 0;
@@ -154,7 +157,8 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry,
 		memcpy(data, xbuf, *size);
 		*size = stat & 0x0000001f;
 	}
-
+out_err:
+	nvkm_i2c_aux_autodpcd(i2c, aux->ch, true);
 out:
 	g94_i2c_aux_fini(aux);
 	return ret < 0 ? ret : (stat & 0x000f0000) >> 16;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c
index edb6148cbca042c544939adb2cae2588e90a3e0e..8bd1d442e4654a0f4ff0f75c9c485e64d7562a9d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c
@@ -33,7 +33,7 @@ static void
 gm200_i2c_aux_fini(struct gm200_i2c_aux *aux)
 {
 	struct nvkm_device *device = aux->base.pad->i2c->subdev.device;
-	nvkm_mask(device, 0x00d954 + (aux->ch * 0x50), 0x00310000, 0x00000000);
+	nvkm_mask(device, 0x00d954 + (aux->ch * 0x50), 0x00710000, 0x00000000);
 }
 
 static int
@@ -54,10 +54,10 @@ gm200_i2c_aux_init(struct gm200_i2c_aux *aux)
 			AUX_ERR(&aux->base, "begin idle timeout %08x", ctrl);
 			return -EBUSY;
 		}
-	} while (ctrl & 0x03010000);
+	} while (ctrl & 0x07010000);
 
 	/* set some magic, and wait up to 1ms for it to appear */
-	nvkm_mask(device, 0x00d954 + (aux->ch * 0x50), 0x00300000, ureq);
+	nvkm_mask(device, 0x00d954 + (aux->ch * 0x50), 0x00700000, ureq);
 	timeout = 1000;
 	do {
 		ctrl = nvkm_rd32(device, 0x00d954 + (aux->ch * 0x50));
@@ -67,7 +67,7 @@ gm200_i2c_aux_init(struct gm200_i2c_aux *aux)
 			gm200_i2c_aux_fini(aux);
 			return -EBUSY;
 		}
-	} while ((ctrl & 0x03000000) != urep);
+	} while ((ctrl & 0x07000000) != urep);
 
 	return 0;
 }
@@ -77,7 +77,8 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry,
 		   u8 type, u32 addr, u8 *data, u8 *size)
 {
 	struct gm200_i2c_aux *aux = gm200_i2c_aux(obj);
-	struct nvkm_device *device = aux->base.pad->i2c->subdev.device;
+	struct nvkm_i2c *i2c = aux->base.pad->i2c;
+	struct nvkm_device *device = i2c->subdev.device;
 	const u32 base = aux->ch * 0x50;
 	u32 ctrl, stat, timeout, retries = 0;
 	u32 xbuf[4] = {};
@@ -96,6 +97,8 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry,
 		goto out;
 	}
 
+	nvkm_i2c_aux_autodpcd(i2c, aux->ch, false);
+
 	if (!(type & 1)) {
 		memcpy(xbuf, data, *size);
 		for (i = 0; i < 16; i += 4) {
@@ -128,7 +131,7 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry,
 			if (!timeout--) {
 				AUX_ERR(&aux->base, "timeout %08x", ctrl);
 				ret = -EIO;
-				goto out;
+				goto out_err;
 			}
 		} while (ctrl & 0x00010000);
 		ret = 0;
@@ -155,6 +158,8 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry,
 		*size = stat & 0x0000001f;
 	}
 
+out_err:
+	nvkm_i2c_aux_autodpcd(i2c, aux->ch, true);
 out:
 	gm200_i2c_aux_fini(aux);
 	return ret < 0 ? ret : (stat & 0x000f0000) >> 16;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c
new file mode 100644
index 0000000000000000000000000000000000000000..8e3bfa1af52a2d3c09211d3e7382aa24f2deeda4
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+#include "pad.h"
+
+static void
+gk110_aux_autodpcd(struct nvkm_i2c *i2c, int aux, bool enable)
+{
+	nvkm_mask(i2c->subdev.device, 0x00e4f8 + (aux * 0x50), 0x00010000, enable << 16);
+}
+
+static const struct nvkm_i2c_func
+gk110_i2c = {
+	.pad_x_new = gf119_i2c_pad_x_new,
+	.pad_s_new = gf119_i2c_pad_s_new,
+	.aux = 4,
+	.aux_stat = gk104_aux_stat,
+	.aux_mask = gk104_aux_mask,
+	.aux_autodpcd = gk110_aux_autodpcd,
+};
+
+int
+gk110_i2c_new(struct nvkm_device *device, int index, struct nvkm_i2c **pi2c)
+{
+	return nvkm_i2c_new_(&gk110_i2c, device, index, pi2c);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm200.c
index a23c5f315221cb68d0230bff8095148a1a4604b0..7b2375bff8a9cc8cc9e43ed0c945db02a8fe75fe 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm200.c
@@ -24,6 +24,12 @@
 #include "priv.h"
 #include "pad.h"
 
+static void
+gm200_aux_autodpcd(struct nvkm_i2c *i2c, int aux, bool enable)
+{
+	nvkm_mask(i2c->subdev.device, 0x00d968 + (aux * 0x50), 0x00010000, enable << 16);
+}
+
 static const struct nvkm_i2c_func
 gm200_i2c = {
 	.pad_x_new = gf119_i2c_pad_x_new,
@@ -31,6 +37,7 @@ gm200_i2c = {
 	.aux = 8,
 	.aux_stat = gk104_aux_stat,
 	.aux_mask = gk104_aux_mask,
+	.aux_autodpcd = gm200_aux_autodpcd,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h
index 461016814f4f272205386e4286b2e20befc8b920..44b7bb7d4777650ce3730868007750f6668dbd08 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: MIT */
 #ifndef __NVKM_I2C_PAD_H__
 #define __NVKM_I2C_PAD_H__
-#include <subdev/i2c.h>
+#include "priv.h"
 
 struct nvkm_i2c_pad {
 	const struct nvkm_i2c_pad_func *func;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/priv.h
index bd86bc298ebe54e87bd6cda6fb122f663c244f21..e35f6036fcfcb85edd2d652cec50e858d72a4dfb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/priv.h
@@ -23,6 +23,10 @@ struct nvkm_i2c_func {
 	/* mask on/off interrupt types for a given set of auxch
 	 */
 	void (*aux_mask)(struct nvkm_i2c *, u32, u32, u32);
+
+	/* enable/disable HW-initiated DPCD reads
+	 */
+	void (*aux_autodpcd)(struct nvkm_i2c *, int aux, bool enable);
 };
 
 void g94_aux_stat(struct nvkm_i2c *, u32 *, u32 *, u32 *, u32 *);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c
index 2340040942c937c20dddd4bfd26a686df360596a..1115376bc85f5fd5d28e5cb37185721d73cf5e7d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c
@@ -22,6 +22,7 @@
  * Authors: Ben Skeggs
  */
 #include "priv.h"
+#include <subdev/timer.h>
 
 static void
 gf100_ibus_intr_hub(struct nvkm_subdev *ibus, int i)
@@ -31,7 +32,6 @@ gf100_ibus_intr_hub(struct nvkm_subdev *ibus, int i)
 	u32 data = nvkm_rd32(device, 0x122124 + (i * 0x0400));
 	u32 stat = nvkm_rd32(device, 0x122128 + (i * 0x0400));
 	nvkm_debug(ibus, "HUB%d: %06x %08x (%08x)\n", i, addr, data, stat);
-	nvkm_mask(device, 0x122128 + (i * 0x0400), 0x00000200, 0x00000000);
 }
 
 static void
@@ -42,7 +42,6 @@ gf100_ibus_intr_rop(struct nvkm_subdev *ibus, int i)
 	u32 data = nvkm_rd32(device, 0x124124 + (i * 0x0400));
 	u32 stat = nvkm_rd32(device, 0x124128 + (i * 0x0400));
 	nvkm_debug(ibus, "ROP%d: %06x %08x (%08x)\n", i, addr, data, stat);
-	nvkm_mask(device, 0x124128 + (i * 0x0400), 0x00000200, 0x00000000);
 }
 
 static void
@@ -53,7 +52,6 @@ gf100_ibus_intr_gpc(struct nvkm_subdev *ibus, int i)
 	u32 data = nvkm_rd32(device, 0x128124 + (i * 0x0400));
 	u32 stat = nvkm_rd32(device, 0x128128 + (i * 0x0400));
 	nvkm_debug(ibus, "GPC%d: %06x %08x (%08x)\n", i, addr, data, stat);
-	nvkm_mask(device, 0x128128 + (i * 0x0400), 0x00000200, 0x00000000);
 }
 
 void
@@ -90,6 +88,12 @@ gf100_ibus_intr(struct nvkm_subdev *ibus)
 			intr1 &= ~stat;
 		}
 	}
+
+	nvkm_mask(device, 0x121c4c, 0x0000003f, 0x00000002);
+	nvkm_msec(device, 2000,
+		if (!(nvkm_rd32(device, 0x121c4c) & 0x0000003f))
+			break;
+	);
 }
 
 static int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c
index f3915f85838ed400e4dc3d4a5bc5b36f12b5591f..22e487b493ad1346851a58c02599ceb97011a798 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c
@@ -22,6 +22,7 @@
  * Authors: Ben Skeggs
  */
 #include "priv.h"
+#include <subdev/timer.h>
 
 static void
 gk104_ibus_intr_hub(struct nvkm_subdev *ibus, int i)
@@ -31,7 +32,6 @@ gk104_ibus_intr_hub(struct nvkm_subdev *ibus, int i)
 	u32 data = nvkm_rd32(device, 0x122124 + (i * 0x0800));
 	u32 stat = nvkm_rd32(device, 0x122128 + (i * 0x0800));
 	nvkm_debug(ibus, "HUB%d: %06x %08x (%08x)\n", i, addr, data, stat);
-	nvkm_mask(device, 0x122128 + (i * 0x0800), 0x00000200, 0x00000000);
 }
 
 static void
@@ -42,7 +42,6 @@ gk104_ibus_intr_rop(struct nvkm_subdev *ibus, int i)
 	u32 data = nvkm_rd32(device, 0x124124 + (i * 0x0800));
 	u32 stat = nvkm_rd32(device, 0x124128 + (i * 0x0800));
 	nvkm_debug(ibus, "ROP%d: %06x %08x (%08x)\n", i, addr, data, stat);
-	nvkm_mask(device, 0x124128 + (i * 0x0800), 0x00000200, 0x00000000);
 }
 
 static void
@@ -53,7 +52,6 @@ gk104_ibus_intr_gpc(struct nvkm_subdev *ibus, int i)
 	u32 data = nvkm_rd32(device, 0x128124 + (i * 0x0800));
 	u32 stat = nvkm_rd32(device, 0x128128 + (i * 0x0800));
 	nvkm_debug(ibus, "GPC%d: %06x %08x (%08x)\n", i, addr, data, stat);
-	nvkm_mask(device, 0x128128 + (i * 0x0800), 0x00000200, 0x00000000);
 }
 
 void
@@ -90,6 +88,12 @@ gk104_ibus_intr(struct nvkm_subdev *ibus)
 			intr1 &= ~stat;
 		}
 	}
+
+	nvkm_mask(device, 0x12004c, 0x0000003f, 0x00000002);
+	nvkm_msec(device, 2000,
+		if (!(nvkm_rd32(device, 0x12004c) & 0x0000003f))
+			break;
+	);
 }
 
 static int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild
index 2585ef07532ac094a2f888ffb38d503a846572b3..ac2b34e9ac6add2ca24bff686f8799bcf39ff7ac 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild
@@ -14,3 +14,4 @@ nvkm-y += nvkm/subdev/mc/gk20a.o
 nvkm-y += nvkm/subdev/mc/gp100.o
 nvkm-y += nvkm/subdev/mc/gp10b.o
 nvkm-y += nvkm/subdev/mc/tu102.o
+nvkm-y += nvkm/subdev/mc/ga100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c
new file mode 100644
index 0000000000000000000000000000000000000000..967eb3af11eb00efd775b15d973f551a9e427876
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+static void
+ga100_mc_intr_unarm(struct nvkm_mc *mc)
+{
+	nvkm_wr32(mc->subdev.device, 0xb81610, 0x00000004);
+}
+
+static void
+ga100_mc_intr_rearm(struct nvkm_mc *mc)
+{
+	nvkm_wr32(mc->subdev.device, 0xb81608, 0x00000004);
+}
+
+static void
+ga100_mc_intr_mask(struct nvkm_mc *mc, u32 mask, u32 intr)
+{
+	nvkm_wr32(mc->subdev.device, 0xb81210,          mask & intr );
+	nvkm_wr32(mc->subdev.device, 0xb81410, mask & ~(mask & intr));
+}
+
+static u32
+ga100_mc_intr_stat(struct nvkm_mc *mc)
+{
+	u32 intr_top = nvkm_rd32(mc->subdev.device, 0xb81600), intr = 0x00000000;
+	if (intr_top & 0x00000004)
+		intr = nvkm_mask(mc->subdev.device, 0xb81010, 0x00000000, 0x00000000);
+	return intr;
+}
+
+static void
+ga100_mc_init(struct nvkm_mc *mc)
+{
+	nv50_mc_init(mc);
+	nvkm_wr32(mc->subdev.device, 0xb81210, 0xffffffff);
+}
+
+static const struct nvkm_mc_func
+ga100_mc = {
+	.init = ga100_mc_init,
+	.intr = gp100_mc_intr,
+	.intr_unarm = ga100_mc_intr_unarm,
+	.intr_rearm = ga100_mc_intr_rearm,
+	.intr_mask = ga100_mc_intr_mask,
+	.intr_stat = ga100_mc_intr_stat,
+	.reset = gk104_mc_reset,
+};
+
+int
+ga100_mc_new(struct nvkm_device *device, int index, struct nvkm_mc **pmc)
+{
+	return nvkm_mc_new_(&ga100_mc, device, index, pmc);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c
index de91e9a2617258cbcc93e7ea027a9d4a77a1f99f..6d5212ae2fd57b741715e7d7c9e4c83fe5bdd13e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c
@@ -316,9 +316,9 @@ nvkm_mmu_vram(struct nvkm_mmu *mmu)
 {
 	struct nvkm_device *device = mmu->subdev.device;
 	struct nvkm_mm *mm = &device->fb->ram->vram;
-	const u32 sizeN = nvkm_mm_heap_size(mm, NVKM_RAM_MM_NORMAL);
-	const u32 sizeU = nvkm_mm_heap_size(mm, NVKM_RAM_MM_NOMAP);
-	const u32 sizeM = nvkm_mm_heap_size(mm, NVKM_RAM_MM_MIXED);
+	const u64 sizeN = nvkm_mm_heap_size(mm, NVKM_RAM_MM_NORMAL);
+	const u64 sizeU = nvkm_mm_heap_size(mm, NVKM_RAM_MM_NOMAP);
+	const u64 sizeM = nvkm_mm_heap_size(mm, NVKM_RAM_MM_MIXED);
 	u8 type = NVKM_MEM_KIND * !!mmu->func->kind;
 	u8 heap = NVKM_MEM_VRAM;
 	int heapM, heapN, heapU;
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index a00b7ab9c14cfca59adbd7b8545ebb1a14d29d60..8cd776adc592be46dff106dc8fe25b1219015502 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -66,7 +66,7 @@ static struct ttm_pool_type global_uncached[MAX_ORDER];
 static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
 static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
 
-static spinlock_t shrinker_lock;
+static struct mutex shrinker_lock;
 static struct list_head shrinker_list;
 static struct shrinker mm_shrinker;
 
@@ -190,7 +190,7 @@ static int ttm_pool_map(struct ttm_pool *pool, unsigned int order,
 		size_t size = (1ULL << order) * PAGE_SIZE;
 
 		addr = dma_map_page(pool->dev, p, 0, size, DMA_BIDIRECTIONAL);
-		if (dma_mapping_error(pool->dev, **dma_addr))
+		if (dma_mapping_error(pool->dev, addr))
 			return -EFAULT;
 	}
 
@@ -249,9 +249,9 @@ static void ttm_pool_type_init(struct ttm_pool_type *pt, struct ttm_pool *pool,
 	spin_lock_init(&pt->lock);
 	INIT_LIST_HEAD(&pt->pages);
 
-	spin_lock(&shrinker_lock);
+	mutex_lock(&shrinker_lock);
 	list_add_tail(&pt->shrinker_list, &shrinker_list);
-	spin_unlock(&shrinker_lock);
+	mutex_unlock(&shrinker_lock);
 }
 
 /* Remove a pool_type from the global shrinker list and free all pages */
@@ -259,9 +259,9 @@ static void ttm_pool_type_fini(struct ttm_pool_type *pt)
 {
 	struct page *p, *tmp;
 
-	spin_lock(&shrinker_lock);
+	mutex_lock(&shrinker_lock);
 	list_del(&pt->shrinker_list);
-	spin_unlock(&shrinker_lock);
+	mutex_unlock(&shrinker_lock);
 
 	list_for_each_entry_safe(p, tmp, &pt->pages, lru)
 		ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
@@ -302,7 +302,7 @@ static unsigned int ttm_pool_shrink(void)
 	unsigned int num_freed;
 	struct page *p;
 
-	spin_lock(&shrinker_lock);
+	mutex_lock(&shrinker_lock);
 	pt = list_first_entry(&shrinker_list, typeof(*pt), shrinker_list);
 
 	p = ttm_pool_type_take(pt);
@@ -314,7 +314,7 @@ static unsigned int ttm_pool_shrink(void)
 	}
 
 	list_move_tail(&pt->shrinker_list, &shrinker_list);
-	spin_unlock(&shrinker_lock);
+	mutex_unlock(&shrinker_lock);
 
 	return num_freed;
 }
@@ -564,7 +564,7 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
 {
 	unsigned int i;
 
-	spin_lock(&shrinker_lock);
+	mutex_lock(&shrinker_lock);
 
 	seq_puts(m, "\t ");
 	for (i = 0; i < MAX_ORDER; ++i)
@@ -600,7 +600,7 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
 	seq_printf(m, "\ntotal\t: %8lu of %8lu\n",
 		   atomic_long_read(&allocated_pages), page_pool_size);
 
-	spin_unlock(&shrinker_lock);
+	mutex_unlock(&shrinker_lock);
 
 	return 0;
 }
@@ -644,7 +644,7 @@ int ttm_pool_mgr_init(unsigned long num_pages)
 	if (!page_pool_size)
 		page_pool_size = num_pages;
 
-	spin_lock_init(&shrinker_lock);
+	mutex_init(&shrinker_lock);
 	INIT_LIST_HEAD(&shrinker_list);
 
 	for (i = 0; i < MAX_ORDER; ++i) {
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 7f70e5a7de10543318f1afe420193e8f42084c59..97a77ea8d3c9c1048b27dd90b68e3b90b3809c33 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -131,8 +131,10 @@ static ssize_t default_roce_mode_store(struct config_item *item,
 		return ret;
 
 	gid_type = ib_cache_gid_parse_type_str(buf);
-	if (gid_type < 0)
+	if (gid_type < 0) {
+		cma_configfs_params_put(cma_dev);
 		return -EINVAL;
+	}
 
 	ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type);
 
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index e0a41c8670023a4ef2df594248353fbfef4e28cb..ff1551b3cf619862d66077b8a7b6f12d3175147b 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -254,6 +254,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res)
 	} else {
 		ret = xa_alloc_cyclic(&rt->xa, &res->id, res, xa_limit_32b,
 				      &rt->next_id, GFP_KERNEL);
+		ret = (ret < 0) ? ret : 0;
 	}
 
 out:
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 7dab9a27a145a8eee04150c3caa11f2988bae584..da2512c30ffd5276c02e651638c3db89d403287f 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -95,8 +95,6 @@ struct ucma_context {
 	u64			uid;
 
 	struct list_head	list;
-	/* sync between removal event and id destroy, protected by file mut */
-	int			destroying;
 	struct work_struct	close_work;
 };
 
@@ -122,7 +120,7 @@ static DEFINE_XARRAY_ALLOC(ctx_table);
 static DEFINE_XARRAY_ALLOC(multicast_table);
 
 static const struct file_operations ucma_fops;
-static int __destroy_id(struct ucma_context *ctx);
+static int ucma_destroy_private_ctx(struct ucma_context *ctx);
 
 static inline struct ucma_context *_ucma_find_context(int id,
 						      struct ucma_file *file)
@@ -179,19 +177,14 @@ static void ucma_close_id(struct work_struct *work)
 
 	/* once all inflight tasks are finished, we close all underlying
 	 * resources. The context is still alive till its explicit destryoing
-	 * by its creator.
+	 * by its creator. This puts back the xarray's reference.
 	 */
 	ucma_put_ctx(ctx);
 	wait_for_completion(&ctx->comp);
 	/* No new events will be generated after destroying the id. */
 	rdma_destroy_id(ctx->cm_id);
 
-	/*
-	 * At this point ctx->ref is zero so the only place the ctx can be is in
-	 * a uevent or in __destroy_id(). Since the former doesn't touch
-	 * ctx->cm_id and the latter sync cancels this, there is no races with
-	 * this store.
-	 */
+	/* Reading the cm_id without holding a positive ref is not allowed */
 	ctx->cm_id = NULL;
 }
 
@@ -204,7 +197,6 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
 		return NULL;
 
 	INIT_WORK(&ctx->close_work, ucma_close_id);
-	refcount_set(&ctx->ref, 1);
 	init_completion(&ctx->comp);
 	/* So list_del() will work if we don't do ucma_finish_ctx() */
 	INIT_LIST_HEAD(&ctx->list);
@@ -218,6 +210,13 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
 	return ctx;
 }
 
+static void ucma_set_ctx_cm_id(struct ucma_context *ctx,
+			       struct rdma_cm_id *cm_id)
+{
+	refcount_set(&ctx->ref, 1);
+	ctx->cm_id = cm_id;
+}
+
 static void ucma_finish_ctx(struct ucma_context *ctx)
 {
 	lockdep_assert_held(&ctx->file->mut);
@@ -303,7 +302,7 @@ static int ucma_connect_event_handler(struct rdma_cm_id *cm_id,
 	ctx = ucma_alloc_ctx(listen_ctx->file);
 	if (!ctx)
 		goto err_backlog;
-	ctx->cm_id = cm_id;
+	ucma_set_ctx_cm_id(ctx, cm_id);
 
 	uevent = ucma_create_uevent(listen_ctx, event);
 	if (!uevent)
@@ -321,8 +320,7 @@ static int ucma_connect_event_handler(struct rdma_cm_id *cm_id,
 	return 0;
 
 err_alloc:
-	xa_erase(&ctx_table, ctx->id);
-	kfree(ctx);
+	ucma_destroy_private_ctx(ctx);
 err_backlog:
 	atomic_inc(&listen_ctx->backlog);
 	/* Returning error causes the new ID to be destroyed */
@@ -356,8 +354,12 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
 		wake_up_interruptible(&ctx->file->poll_wait);
 	}
 
-	if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL && !ctx->destroying)
-		queue_work(system_unbound_wq, &ctx->close_work);
+	if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
+		xa_lock(&ctx_table);
+		if (xa_load(&ctx_table, ctx->id) == ctx)
+			queue_work(system_unbound_wq, &ctx->close_work);
+		xa_unlock(&ctx_table);
+	}
 	return 0;
 }
 
@@ -461,13 +463,12 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
 		ret = PTR_ERR(cm_id);
 		goto err1;
 	}
-	ctx->cm_id = cm_id;
+	ucma_set_ctx_cm_id(ctx, cm_id);
 
 	resp.id = ctx->id;
 	if (copy_to_user(u64_to_user_ptr(cmd.response),
 			 &resp, sizeof(resp))) {
-		xa_erase(&ctx_table, ctx->id);
-		__destroy_id(ctx);
+		ucma_destroy_private_ctx(ctx);
 		return -EFAULT;
 	}
 
@@ -477,8 +478,7 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
 	return 0;
 
 err1:
-	xa_erase(&ctx_table, ctx->id);
-	kfree(ctx);
+	ucma_destroy_private_ctx(ctx);
 	return ret;
 }
 
@@ -516,68 +516,73 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
 	rdma_unlock_handler(mc->ctx->cm_id);
 }
 
-/*
- * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At
- * this point, no new events will be reported from the hardware. However, we
- * still need to cleanup the UCMA context for this ID. Specifically, there
- * might be events that have not yet been consumed by the user space software.
- * mutex. After that we release them as needed.
- */
-static int ucma_free_ctx(struct ucma_context *ctx)
+static int ucma_cleanup_ctx_events(struct ucma_context *ctx)
 {
 	int events_reported;
 	struct ucma_event *uevent, *tmp;
 	LIST_HEAD(list);
 
-	ucma_cleanup_multicast(ctx);
-
-	/* Cleanup events not yet reported to the user. */
+	/* Cleanup events not yet reported to the user.*/
 	mutex_lock(&ctx->file->mut);
 	list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
-		if (uevent->ctx == ctx || uevent->conn_req_ctx == ctx)
+		if (uevent->ctx != ctx)
+			continue;
+
+		if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST &&
+		    xa_cmpxchg(&ctx_table, uevent->conn_req_ctx->id,
+			       uevent->conn_req_ctx, XA_ZERO_ENTRY,
+			       GFP_KERNEL) == uevent->conn_req_ctx) {
 			list_move_tail(&uevent->list, &list);
+			continue;
+		}
+		list_del(&uevent->list);
+		kfree(uevent);
 	}
 	list_del(&ctx->list);
 	events_reported = ctx->events_reported;
 	mutex_unlock(&ctx->file->mut);
 
 	/*
-	 * If this was a listening ID then any connections spawned from it
-	 * that have not been delivered to userspace are cleaned up too.
-	 * Must be done outside any locks.
+	 * If this was a listening ID then any connections spawned from it that
+	 * have not been delivered to userspace are cleaned up too. Must be done
+	 * outside any locks.
 	 */
 	list_for_each_entry_safe(uevent, tmp, &list, list) {
-		list_del(&uevent->list);
-		if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST &&
-		    uevent->conn_req_ctx != ctx)
-			__destroy_id(uevent->conn_req_ctx);
+		ucma_destroy_private_ctx(uevent->conn_req_ctx);
 		kfree(uevent);
 	}
-
-	mutex_destroy(&ctx->mutex);
-	kfree(ctx);
 	return events_reported;
 }
 
-static int __destroy_id(struct ucma_context *ctx)
+/*
+ * When this is called the xarray must have a XA_ZERO_ENTRY in the ctx->id (ie
+ * the ctx is not public to the user). This either because:
+ *  - ucma_finish_ctx() hasn't been called
+ *  - xa_cmpxchg() succeed to remove the entry (only one thread can succeed)
+ */
+static int ucma_destroy_private_ctx(struct ucma_context *ctx)
 {
+	int events_reported;
+
 	/*
-	 * If the refcount is already 0 then ucma_close_id() has already
-	 * destroyed the cm_id, otherwise holding the refcount keeps cm_id
-	 * valid. Prevent queue_work() from being called.
+	 * Destroy the underlying cm_id. New work queuing is prevented now by
+	 * the removal from the xarray. Once the work is cancled ref will either
+	 * be 0 because the work ran to completion and consumed the ref from the
+	 * xarray, or it will be positive because we still have the ref from the
+	 * xarray. This can also be 0 in cases where cm_id was never set
 	 */
-	if (refcount_inc_not_zero(&ctx->ref)) {
-		rdma_lock_handler(ctx->cm_id);
-		ctx->destroying = 1;
-		rdma_unlock_handler(ctx->cm_id);
-		ucma_put_ctx(ctx);
-	}
-
 	cancel_work_sync(&ctx->close_work);
-	/* At this point it's guaranteed that there is no inflight closing task */
-	if (ctx->cm_id)
+	if (refcount_read(&ctx->ref))
 		ucma_close_id(&ctx->close_work);
-	return ucma_free_ctx(ctx);
+
+	events_reported = ucma_cleanup_ctx_events(ctx);
+	ucma_cleanup_multicast(ctx);
+
+	WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, XA_ZERO_ENTRY, NULL,
+			   GFP_KERNEL) != NULL);
+	mutex_destroy(&ctx->mutex);
+	kfree(ctx);
+	return events_reported;
 }
 
 static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
@@ -596,14 +601,17 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
 
 	xa_lock(&ctx_table);
 	ctx = _ucma_find_context(cmd.id, file);
-	if (!IS_ERR(ctx))
-		__xa_erase(&ctx_table, ctx->id);
+	if (!IS_ERR(ctx)) {
+		if (__xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY,
+				 GFP_KERNEL) != ctx)
+			ctx = ERR_PTR(-ENOENT);
+	}
 	xa_unlock(&ctx_table);
 
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
-	resp.events_reported = __destroy_id(ctx);
+	resp.events_reported = ucma_destroy_private_ctx(ctx);
 	if (copy_to_user(u64_to_user_ptr(cmd.response),
 			 &resp, sizeof(resp)))
 		ret = -EFAULT;
@@ -1777,15 +1785,16 @@ static int ucma_close(struct inode *inode, struct file *filp)
 	 * prevented by this being a FD release function. The list_add_tail() in
 	 * ucma_connect_event_handler() can run concurrently, however it only
 	 * adds to the list *after* a listening ID. By only reading the first of
-	 * the list, and relying on __destroy_id() to block
+	 * the list, and relying on ucma_destroy_private_ctx() to block
 	 * ucma_connect_event_handler(), no additional locking is needed.
 	 */
 	while (!list_empty(&file->ctx_list)) {
 		struct ucma_context *ctx = list_first_entry(
 			&file->ctx_list, struct ucma_context, list);
 
-		xa_erase(&ctx_table, ctx->id);
-		__destroy_id(ctx);
+		WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY,
+				   GFP_KERNEL) != ctx);
+		ucma_destroy_private_ctx(ctx);
 	}
 	kfree(file);
 	return 0;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 7ca4112e3e8f7ff001c154225743391be9f9fb8a..917338db7ac13e8d6582c5ba84f00b6b85b369b4 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -135,7 +135,7 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
 	 */
 	if (mask)
 		pgsz_bitmap &= GENMASK(count_trailing_zeros(mask), 0);
-	return rounddown_pow_of_two(pgsz_bitmap);
+	return pgsz_bitmap ? rounddown_pow_of_two(pgsz_bitmap) : 0;
 }
 EXPORT_SYMBOL(ib_umem_find_best_pgsz);
 
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 3bae9ba0ead8518eba40570130412955d2c19f92..d26f3f3e0462a03e8cc38058124add5bcb0f86d8 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -3956,7 +3956,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
 
 	err = set_has_smi_cap(dev);
 	if (err)
-		return err;
+		goto err_mp;
 
 	if (!mlx5_core_mp_enabled(mdev)) {
 		for (i = 1; i <= dev->num_ports; i++) {
@@ -4319,7 +4319,7 @@ static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
 
 	err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true);
 	if (err)
-		mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
+		mlx5_free_bfreg(dev->mdev, &dev->bfreg);
 
 	return err;
 }
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index bc98bd950d99fadb0a24e2b9979be7fa1369b77e..3acb5c10b1553f03ab96a3336d9c0318e9edd966 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -434,9 +434,9 @@ static void ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
 		pr_err("%s(%d) Freeing in use pdid=0x%x.\n",
 		       __func__, dev->id, pd->id);
 	}
-	kfree(uctx->cntxt_pd);
 	uctx->cntxt_pd = NULL;
 	_ocrdma_dealloc_pd(dev, pd);
+	kfree(pd);
 }
 
 static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx)
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 38a37770c01627cd3baaf9e9a270a491b2a5655e..3705c6b8b2237105997082ee0173e1130edae9fa 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -214,6 +214,7 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
 
 		}
 		usnic_uiom_free_dev_list(dev_list);
+		dev_list = NULL;
 	}
 
 	/* Try to find resources on an unused vf */
@@ -239,6 +240,8 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
 qp_grp_check:
 	if (IS_ERR_OR_NULL(qp_grp)) {
 		usnic_err("Failed to allocate qp_grp\n");
+		if (usnic_ib_share_vf)
+			usnic_uiom_free_dev_list(dev_list);
 		return ERR_PTR(qp_grp ? PTR_ERR(qp_grp) : -ENOMEM);
 	}
 	return qp_grp;
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 1b83d140742f397ddcbbf0054914187b0430a566..bcda17012aee8fa433cbc3f31896613e3e6e2606 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -325,7 +325,9 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
 }
 
 static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
+	{ .compatible = "qcom,msm8998-smmu-v2" },
 	{ .compatible = "qcom,sc7180-smmu-500" },
+	{ .compatible = "qcom,sdm630-smmu-v2" },
 	{ .compatible = "qcom,sdm845-smmu-500" },
 	{ .compatible = "qcom,sm8150-smmu-500" },
 	{ .compatible = "qcom,sm8250-smmu-500" },
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 65cf06d70bf4ed900bb0a07ad36c63c6aeca406e..f665322a09919fbaea5dd3eae59c7ac87a7fe04d 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -38,7 +38,6 @@
 #include <linux/dmi.h>
 #include <linux/pci-ats.h>
 #include <linux/memblock.h>
-#include <linux/dma-map-ops.h>
 #include <linux/dma-direct.h>
 #include <linux/crash_dump.h>
 #include <linux/numa.h>
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 790ef3497e7e3557a1fcf9cfad0dd7d67d73a382..18a9f05df4079b1b3ee2d19f8a748536cbec360e 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -118,8 +118,10 @@ void intel_svm_check(struct intel_iommu *iommu)
 	iommu->flags |= VTD_FLAG_SVM_CAPABLE;
 }
 
-static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev,
-				unsigned long address, unsigned long pages, int ih)
+static void __flush_svm_range_dev(struct intel_svm *svm,
+				  struct intel_svm_dev *sdev,
+				  unsigned long address,
+				  unsigned long pages, int ih)
 {
 	struct qi_desc desc;
 
@@ -170,6 +172,22 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d
 	}
 }
 
+static void intel_flush_svm_range_dev(struct intel_svm *svm,
+				      struct intel_svm_dev *sdev,
+				      unsigned long address,
+				      unsigned long pages, int ih)
+{
+	unsigned long shift = ilog2(__roundup_pow_of_two(pages));
+	unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift));
+	unsigned long start = ALIGN_DOWN(address, align);
+	unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align);
+
+	while (start < end) {
+		__flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih);
+		start += align;
+	}
+}
+
 static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
 				unsigned long pages, int ih)
 {
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index b7e2d96666142eb948924a8112ed02e2c3f779f1..9e44c09f6410890f43faaad328e7edea0f5da986 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -605,6 +605,7 @@ config DM_INTEGRITY
 	select BLK_DEV_INTEGRITY
 	select DM_BUFIO
 	select CRYPTO
+	select CRYPTO_SKCIPHER
 	select ASYNC_XOR
 	help
 	  This device-mapper target emulates a block device that has
@@ -622,6 +623,7 @@ config DM_ZONED
 	tristate "Drive-managed zoned block device target support"
 	depends on BLK_DEV_DM
 	depends on BLK_DEV_ZONED
+	select CRC32
 	help
 	  This device-mapper target takes a host-managed or host-aware zoned
 	  block device and exposes most of its capacity as a regular block
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 9c1a86bde658e48b67dcc67058d01de0c15d6e12..fce4cbf9529d6c85e5f7df6e99eb71db2c1df053 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1534,6 +1534,12 @@ sector_t dm_bufio_get_device_size(struct dm_bufio_client *c)
 }
 EXPORT_SYMBOL_GPL(dm_bufio_get_device_size);
 
+struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c)
+{
+	return c->dm_io;
+}
+EXPORT_SYMBOL_GPL(dm_bufio_get_dm_io_client);
+
 sector_t dm_bufio_get_block_number(struct dm_buffer *b)
 {
 	return b->block;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 53791138d78bf4b5296f3aa400f517b60aabe9f4..8c874710f0bcfdca9c741018fca1ebd53f7ebfa2 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1454,13 +1454,16 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc,
 static void kcryptd_async_done(struct crypto_async_request *async_req,
 			       int error);
 
-static void crypt_alloc_req_skcipher(struct crypt_config *cc,
+static int crypt_alloc_req_skcipher(struct crypt_config *cc,
 				     struct convert_context *ctx)
 {
 	unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1);
 
-	if (!ctx->r.req)
-		ctx->r.req = mempool_alloc(&cc->req_pool, GFP_NOIO);
+	if (!ctx->r.req) {
+		ctx->r.req = mempool_alloc(&cc->req_pool, in_interrupt() ? GFP_ATOMIC : GFP_NOIO);
+		if (!ctx->r.req)
+			return -ENOMEM;
+	}
 
 	skcipher_request_set_tfm(ctx->r.req, cc->cipher_tfm.tfms[key_index]);
 
@@ -1471,13 +1474,18 @@ static void crypt_alloc_req_skcipher(struct crypt_config *cc,
 	skcipher_request_set_callback(ctx->r.req,
 	    CRYPTO_TFM_REQ_MAY_BACKLOG,
 	    kcryptd_async_done, dmreq_of_req(cc, ctx->r.req));
+
+	return 0;
 }
 
-static void crypt_alloc_req_aead(struct crypt_config *cc,
+static int crypt_alloc_req_aead(struct crypt_config *cc,
 				 struct convert_context *ctx)
 {
-	if (!ctx->r.req_aead)
-		ctx->r.req_aead = mempool_alloc(&cc->req_pool, GFP_NOIO);
+	if (!ctx->r.req) {
+		ctx->r.req = mempool_alloc(&cc->req_pool, in_interrupt() ? GFP_ATOMIC : GFP_NOIO);
+		if (!ctx->r.req)
+			return -ENOMEM;
+	}
 
 	aead_request_set_tfm(ctx->r.req_aead, cc->cipher_tfm.tfms_aead[0]);
 
@@ -1488,15 +1496,17 @@ static void crypt_alloc_req_aead(struct crypt_config *cc,
 	aead_request_set_callback(ctx->r.req_aead,
 	    CRYPTO_TFM_REQ_MAY_BACKLOG,
 	    kcryptd_async_done, dmreq_of_req(cc, ctx->r.req_aead));
+
+	return 0;
 }
 
-static void crypt_alloc_req(struct crypt_config *cc,
+static int crypt_alloc_req(struct crypt_config *cc,
 			    struct convert_context *ctx)
 {
 	if (crypt_integrity_aead(cc))
-		crypt_alloc_req_aead(cc, ctx);
+		return crypt_alloc_req_aead(cc, ctx);
 	else
-		crypt_alloc_req_skcipher(cc, ctx);
+		return crypt_alloc_req_skcipher(cc, ctx);
 }
 
 static void crypt_free_req_skcipher(struct crypt_config *cc,
@@ -1529,17 +1539,28 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_
  * Encrypt / decrypt data from one bio to another one (can be the same one)
  */
 static blk_status_t crypt_convert(struct crypt_config *cc,
-			 struct convert_context *ctx, bool atomic)
+			 struct convert_context *ctx, bool atomic, bool reset_pending)
 {
 	unsigned int tag_offset = 0;
 	unsigned int sector_step = cc->sector_size >> SECTOR_SHIFT;
 	int r;
 
-	atomic_set(&ctx->cc_pending, 1);
+	/*
+	 * if reset_pending is set we are dealing with the bio for the first time,
+	 * else we're continuing to work on the previous bio, so don't mess with
+	 * the cc_pending counter
+	 */
+	if (reset_pending)
+		atomic_set(&ctx->cc_pending, 1);
 
 	while (ctx->iter_in.bi_size && ctx->iter_out.bi_size) {
 
-		crypt_alloc_req(cc, ctx);
+		r = crypt_alloc_req(cc, ctx);
+		if (r) {
+			complete(&ctx->restart);
+			return BLK_STS_DEV_RESOURCE;
+		}
+
 		atomic_inc(&ctx->cc_pending);
 
 		if (crypt_integrity_aead(cc))
@@ -1553,7 +1574,25 @@ static blk_status_t crypt_convert(struct crypt_config *cc,
 		 * but the driver request queue is full, let's wait.
 		 */
 		case -EBUSY:
-			wait_for_completion(&ctx->restart);
+			if (in_interrupt()) {
+				if (try_wait_for_completion(&ctx->restart)) {
+					/*
+					 * we don't have to block to wait for completion,
+					 * so proceed
+					 */
+				} else {
+					/*
+					 * we can't wait for completion without blocking
+					 * exit and continue processing in a workqueue
+					 */
+					ctx->r.req = NULL;
+					ctx->cc_sector += sector_step;
+					tag_offset++;
+					return BLK_STS_DEV_RESOURCE;
+				}
+			} else {
+				wait_for_completion(&ctx->restart);
+			}
 			reinit_completion(&ctx->restart);
 			fallthrough;
 		/*
@@ -1691,6 +1730,12 @@ static void crypt_inc_pending(struct dm_crypt_io *io)
 	atomic_inc(&io->io_pending);
 }
 
+static void kcryptd_io_bio_endio(struct work_struct *work)
+{
+	struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
+	bio_endio(io->base_bio);
+}
+
 /*
  * One of the bios was finished. Check for completion of
  * the whole request and correctly clean up the buffer.
@@ -1713,7 +1758,23 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
 		kfree(io->integrity_metadata);
 
 	base_bio->bi_status = error;
-	bio_endio(base_bio);
+
+	/*
+	 * If we are running this function from our tasklet,
+	 * we can't call bio_endio() here, because it will call
+	 * clone_endio() from dm.c, which in turn will
+	 * free the current struct dm_crypt_io structure with
+	 * our tasklet. In this case we need to delay bio_endio()
+	 * execution to after the tasklet is done and dequeued.
+	 */
+	if (tasklet_trylock(&io->tasklet)) {
+		tasklet_unlock(&io->tasklet);
+		bio_endio(base_bio);
+		return;
+	}
+
+	INIT_WORK(&io->work, kcryptd_io_bio_endio);
+	queue_work(cc->io_queue, &io->work);
 }
 
 /*
@@ -1945,6 +2006,37 @@ static bool kcryptd_crypt_write_inline(struct crypt_config *cc,
 	}
 }
 
+static void kcryptd_crypt_write_continue(struct work_struct *work)
+{
+	struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
+	struct crypt_config *cc = io->cc;
+	struct convert_context *ctx = &io->ctx;
+	int crypt_finished;
+	sector_t sector = io->sector;
+	blk_status_t r;
+
+	wait_for_completion(&ctx->restart);
+	reinit_completion(&ctx->restart);
+
+	r = crypt_convert(cc, &io->ctx, true, false);
+	if (r)
+		io->error = r;
+	crypt_finished = atomic_dec_and_test(&ctx->cc_pending);
+	if (!crypt_finished && kcryptd_crypt_write_inline(cc, ctx)) {
+		/* Wait for completion signaled by kcryptd_async_done() */
+		wait_for_completion(&ctx->restart);
+		crypt_finished = 1;
+	}
+
+	/* Encryption was already finished, submit io now */
+	if (crypt_finished) {
+		kcryptd_crypt_write_io_submit(io, 0);
+		io->sector = sector;
+	}
+
+	crypt_dec_pending(io);
+}
+
 static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 {
 	struct crypt_config *cc = io->cc;
@@ -1973,7 +2065,17 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 
 	crypt_inc_pending(io);
 	r = crypt_convert(cc, ctx,
-			  test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags));
+			  test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags), true);
+	/*
+	 * Crypto API backlogged the request, because its queue was full
+	 * and we're in softirq context, so continue from a workqueue
+	 * (TODO: is it actually possible to be in softirq in the write path?)
+	 */
+	if (r == BLK_STS_DEV_RESOURCE) {
+		INIT_WORK(&io->work, kcryptd_crypt_write_continue);
+		queue_work(cc->crypt_queue, &io->work);
+		return;
+	}
 	if (r)
 		io->error = r;
 	crypt_finished = atomic_dec_and_test(&ctx->cc_pending);
@@ -1998,6 +2100,25 @@ static void kcryptd_crypt_read_done(struct dm_crypt_io *io)
 	crypt_dec_pending(io);
 }
 
+static void kcryptd_crypt_read_continue(struct work_struct *work)
+{
+	struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
+	struct crypt_config *cc = io->cc;
+	blk_status_t r;
+
+	wait_for_completion(&io->ctx.restart);
+	reinit_completion(&io->ctx.restart);
+
+	r = crypt_convert(cc, &io->ctx, true, false);
+	if (r)
+		io->error = r;
+
+	if (atomic_dec_and_test(&io->ctx.cc_pending))
+		kcryptd_crypt_read_done(io);
+
+	crypt_dec_pending(io);
+}
+
 static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
 {
 	struct crypt_config *cc = io->cc;
@@ -2009,7 +2130,16 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
 			   io->sector);
 
 	r = crypt_convert(cc, &io->ctx,
-			  test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags));
+			  test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true);
+	/*
+	 * Crypto API backlogged the request, because its queue was full
+	 * and we're in softirq context, so continue from a workqueue
+	 */
+	if (r == BLK_STS_DEV_RESOURCE) {
+		INIT_WORK(&io->work, kcryptd_crypt_read_continue);
+		queue_work(cc->crypt_queue, &io->work);
+		return;
+	}
 	if (r)
 		io->error = r;
 
@@ -2091,8 +2221,12 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io)
 
 	if ((bio_data_dir(io->base_bio) == READ && test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)) ||
 	    (bio_data_dir(io->base_bio) == WRITE && test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags))) {
-		if (in_irq()) {
-			/* Crypto API's "skcipher_walk_first() refuses to work in hard IRQ context */
+		/*
+		 * in_irq(): Crypto API's skcipher_walk_first() refuses to work in hard IRQ context.
+		 * irqs_disabled(): the kernel may run some IO completion from the idle thread, but
+		 * it is being executed with irqs disabled.
+		 */
+		if (in_irq() || irqs_disabled()) {
 			tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work);
 			tasklet_schedule(&io->tasklet);
 			return;
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 5a7a1b90e671cfa09c64dc268547f31101f18cd9..81df019ab284a031218f66fcbbe881be82fef634 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -1379,12 +1379,52 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
 #undef MAY_BE_HASH
 }
 
-static void dm_integrity_flush_buffers(struct dm_integrity_c *ic)
+struct flush_request {
+	struct dm_io_request io_req;
+	struct dm_io_region io_reg;
+	struct dm_integrity_c *ic;
+	struct completion comp;
+};
+
+static void flush_notify(unsigned long error, void *fr_)
+{
+	struct flush_request *fr = fr_;
+	if (unlikely(error != 0))
+		dm_integrity_io_error(fr->ic, "flusing disk cache", -EIO);
+	complete(&fr->comp);
+}
+
+static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_data)
 {
 	int r;
+
+	struct flush_request fr;
+
+	if (!ic->meta_dev)
+		flush_data = false;
+	if (flush_data) {
+		fr.io_req.bi_op = REQ_OP_WRITE,
+		fr.io_req.bi_op_flags = REQ_PREFLUSH | REQ_SYNC,
+		fr.io_req.mem.type = DM_IO_KMEM,
+		fr.io_req.mem.ptr.addr = NULL,
+		fr.io_req.notify.fn = flush_notify,
+		fr.io_req.notify.context = &fr;
+		fr.io_req.client = dm_bufio_get_dm_io_client(ic->bufio),
+		fr.io_reg.bdev = ic->dev->bdev,
+		fr.io_reg.sector = 0,
+		fr.io_reg.count = 0,
+		fr.ic = ic;
+		init_completion(&fr.comp);
+		r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL);
+		BUG_ON(r);
+	}
+
 	r = dm_bufio_write_dirty_buffers(ic->bufio);
 	if (unlikely(r))
 		dm_integrity_io_error(ic, "writing tags", r);
+
+	if (flush_data)
+		wait_for_completion(&fr.comp);
 }
 
 static void sleep_on_endio_wait(struct dm_integrity_c *ic)
@@ -2110,7 +2150,7 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map
 
 	if (unlikely(dio->op == REQ_OP_DISCARD) && likely(ic->mode != 'D')) {
 		integrity_metadata(&dio->work);
-		dm_integrity_flush_buffers(ic);
+		dm_integrity_flush_buffers(ic, false);
 
 		dio->in_flight = (atomic_t)ATOMIC_INIT(1);
 		dio->completion = NULL;
@@ -2195,7 +2235,7 @@ static void integrity_commit(struct work_struct *w)
 	flushes = bio_list_get(&ic->flush_bio_list);
 	if (unlikely(ic->mode != 'J')) {
 		spin_unlock_irq(&ic->endio_wait.lock);
-		dm_integrity_flush_buffers(ic);
+		dm_integrity_flush_buffers(ic, true);
 		goto release_flush_bios;
 	}
 
@@ -2409,7 +2449,7 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
 	complete_journal_op(&comp);
 	wait_for_completion_io(&comp.comp);
 
-	dm_integrity_flush_buffers(ic);
+	dm_integrity_flush_buffers(ic, true);
 }
 
 static void integrity_writer(struct work_struct *w)
@@ -2451,7 +2491,7 @@ static void recalc_write_super(struct dm_integrity_c *ic)
 {
 	int r;
 
-	dm_integrity_flush_buffers(ic);
+	dm_integrity_flush_buffers(ic, false);
 	if (dm_integrity_failed(ic))
 		return;
 
@@ -2654,7 +2694,7 @@ static void bitmap_flush_work(struct work_struct *work)
 	unsigned long limit;
 	struct bio *bio;
 
-	dm_integrity_flush_buffers(ic);
+	dm_integrity_flush_buffers(ic, false);
 
 	range.logical_sector = 0;
 	range.n_sectors = ic->provided_data_sectors;
@@ -2663,9 +2703,7 @@ static void bitmap_flush_work(struct work_struct *work)
 	add_new_range_and_wait(ic, &range);
 	spin_unlock_irq(&ic->endio_wait.lock);
 
-	dm_integrity_flush_buffers(ic);
-	if (ic->meta_dev)
-		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
+	dm_integrity_flush_buffers(ic, true);
 
 	limit = ic->provided_data_sectors;
 	if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
@@ -2934,11 +2972,11 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
 		if (ic->meta_dev)
 			queue_work(ic->writer_wq, &ic->writer_work);
 		drain_workqueue(ic->writer_wq);
-		dm_integrity_flush_buffers(ic);
+		dm_integrity_flush_buffers(ic, true);
 	}
 
 	if (ic->mode == 'B') {
-		dm_integrity_flush_buffers(ic);
+		dm_integrity_flush_buffers(ic, true);
 #if 1
 		/* set to 0 to test bitmap replay code */
 		init_journal(ic, 0, ic->journal_sections, 0);
@@ -3754,7 +3792,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	unsigned extra_args;
 	struct dm_arg_set as;
 	static const struct dm_arg _args[] = {
-		{0, 9, "Invalid number of feature args"},
+		{0, 15, "Invalid number of feature args"},
 	};
 	unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec;
 	bool should_write_sb;
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 23c38777e8f63892880bc608509c147836a5407a..cab12b2251bac21c2717912d7228132e4860b821 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3729,10 +3729,10 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
 	blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs));
 
 	/*
-	 * RAID1 and RAID10 personalities require bio splitting,
-	 * RAID0/4/5/6 don't and process large discard bios properly.
+	 * RAID0 and RAID10 personalities require bio splitting,
+	 * RAID1/4/5/6 don't and process large discard bios properly.
 	 */
-	if (rs_is_raid1(rs) || rs_is_raid10(rs)) {
+	if (rs_is_raid0(rs) || rs_is_raid10(rs)) {
 		limits->discard_granularity = chunk_size_bytes;
 		limits->max_discard_sectors = rs->md.chunk_sectors;
 	}
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 4668b2cd98f4e246f217674839f2f6b82a834fef..11890db71f3fe30a6a01958ae1da28ceb5d76d79 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -141,6 +141,11 @@ struct dm_snapshot {
 	 * for them to be committed.
 	 */
 	struct bio_list bios_queued_during_merge;
+
+	/*
+	 * Flush data after merge.
+	 */
+	struct bio flush_bio;
 };
 
 /*
@@ -1121,6 +1126,17 @@ static void snapshot_merge_next_chunks(struct dm_snapshot *s)
 
 static void error_bios(struct bio *bio);
 
+static int flush_data(struct dm_snapshot *s)
+{
+	struct bio *flush_bio = &s->flush_bio;
+
+	bio_reset(flush_bio);
+	bio_set_dev(flush_bio, s->origin->bdev);
+	flush_bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
+
+	return submit_bio_wait(flush_bio);
+}
+
 static void merge_callback(int read_err, unsigned long write_err, void *context)
 {
 	struct dm_snapshot *s = context;
@@ -1134,6 +1150,11 @@ static void merge_callback(int read_err, unsigned long write_err, void *context)
 		goto shut;
 	}
 
+	if (flush_data(s) < 0) {
+		DMERR("Flush after merge failed: shutting down merge");
+		goto shut;
+	}
+
 	if (s->store->type->commit_merge(s->store,
 					 s->num_merging_chunks) < 0) {
 		DMERR("Write error in exception store: shutting down merge");
@@ -1318,6 +1339,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	s->first_merging_chunk = 0;
 	s->num_merging_chunks = 0;
 	bio_list_init(&s->bios_queued_during_merge);
+	bio_init(&s->flush_bio, NULL, 0);
 
 	/* Allocate hash table for COW data */
 	if (init_hash_tables(s)) {
@@ -1504,6 +1526,8 @@ static void snapshot_dtr(struct dm_target *ti)
 
 	dm_exception_store_destroy(s->store);
 
+	bio_uninit(&s->flush_bio);
+
 	dm_put_device(ti, s->cow);
 
 	dm_put_device(ti, s->origin);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index b3c3c8b4cb428f587dbe9840b276fdc14d8fd630..7bac564f3faa6e1b49899b72e1d5286efcff840b 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -562,7 +562,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
 		 * subset of the parent bdev; require extra privileges.
 		 */
 		if (!capable(CAP_SYS_RAWIO)) {
-			DMWARN_LIMIT(
+			DMDEBUG_LIMIT(
 	"%s: sending ioctl %x to DM device without required privilege.",
 				current->comm, cmd);
 			r = -ENOIOCTLCMD;
diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c
index f580f0ac64975150830db2c5ef67cabae67613e9..01e4a194f187e272c5374775b1577f4154d441b0 100644
--- a/drivers/net/can/dev/dev.c
+++ b/drivers/net/can/dev/dev.c
@@ -153,11 +153,11 @@ static void can_restart(struct net_device *dev)
 
 	cf->can_id |= CAN_ERR_RESTARTED;
 
-	netif_rx_ni(skb);
-
 	stats->rx_packets++;
 	stats->rx_bytes += cf->len;
 
+	netif_rx_ni(skb);
+
 restart:
 	netdev_dbg(dev, "restarted\n");
 	priv->can_stats.restarts++;
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
index 61631f4fd92a1d2dea864873431d60beca8b4318..f347ecc79aef2d5395ecc44167fbdb999eb4edef 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
@@ -514,11 +514,11 @@ static int pcan_usb_fd_decode_canmsg(struct pcan_usb_fd_if *usb_if,
 	else
 		memcpy(cfd->data, rm->d, cfd->len);
 
-	peak_usb_netif_rx(skb, &usb_if->time_ref, le32_to_cpu(rm->ts_low));
-
 	netdev->stats.rx_packets++;
 	netdev->stats.rx_bytes += cfd->len;
 
+	peak_usb_netif_rx(skb, &usb_if->time_ref, le32_to_cpu(rm->ts_low));
+
 	return 0;
 }
 
@@ -580,11 +580,11 @@ static int pcan_usb_fd_decode_status(struct pcan_usb_fd_if *usb_if,
 	if (!skb)
 		return -ENOMEM;
 
-	peak_usb_netif_rx(skb, &usb_if->time_ref, le32_to_cpu(sm->ts_low));
-
 	netdev->stats.rx_packets++;
 	netdev->stats.rx_bytes += cf->len;
 
+	peak_usb_netif_rx(skb, &usb_if->time_ref, le32_to_cpu(sm->ts_low));
+
 	return 0;
 }
 
diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c
index fa47bab510bb90cb35bcc56fe9d0275dccc57cf4..f9a524c5f6d62710722b920b615154670dfc8992 100644
--- a/drivers/net/can/vxcan.c
+++ b/drivers/net/can/vxcan.c
@@ -39,6 +39,7 @@ static netdev_tx_t vxcan_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct net_device *peer;
 	struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
 	struct net_device_stats *peerstats, *srcstats = &dev->stats;
+	u8 len;
 
 	if (can_dropped_invalid_skb(dev, skb))
 		return NETDEV_TX_OK;
@@ -61,12 +62,13 @@ static netdev_tx_t vxcan_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb->dev        = peer;
 	skb->ip_summed  = CHECKSUM_UNNECESSARY;
 
+	len = cfd->len;
 	if (netif_rx_ni(skb) == NET_RX_SUCCESS) {
 		srcstats->tx_packets++;
-		srcstats->tx_bytes += cfd->len;
+		srcstats->tx_bytes += len;
 		peerstats = &peer->stats;
 		peerstats->rx_packets++;
-		peerstats->rx_bytes += cfd->len;
+		peerstats->rx_bytes += len;
 	}
 
 out_unlock:
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index a238ded354c22522bd10ed0b833cf7e6d7e15016..23fc7225c8d1f695c78be2917c7f2a1b006583d7 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1400,7 +1400,7 @@ static int b53_vlan_prepare(struct dsa_switch *ds, int port,
 	    !(vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED))
 		return -EINVAL;
 
-	if (vlan->vid > dev->num_vlans)
+	if (vlan->vid >= dev->num_vlans)
 		return -ERANGE;
 
 	b53_enable_vlan(dev, true, ds->vlan_filtering);
diff --git a/drivers/net/dsa/mv88e6xxx/global1_vtu.c b/drivers/net/dsa/mv88e6xxx/global1_vtu.c
index 66ddf67b87371d0a9c12667b700a6996710023f9..7b96396be609e7bb1331d63c6fd11a403922e0b7 100644
--- a/drivers/net/dsa/mv88e6xxx/global1_vtu.c
+++ b/drivers/net/dsa/mv88e6xxx/global1_vtu.c
@@ -351,6 +351,10 @@ int mv88e6250_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
 		if (err)
 			return err;
 
+		err = mv88e6185_g1_stu_data_read(chip, entry);
+		if (err)
+			return err;
+
 		/* VTU DBNum[3:0] are located in VTU Operation 3:0
 		 * VTU DBNum[5:4] are located in VTU Operation 9:8
 		 */
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index a8b20441ca7cff870cdcb5ff36295d4e9dc372b7..777bbf6d2586d5c0f13b356dcd8758fa4731c09d 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -2495,8 +2495,10 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 	priv = netdev_priv(dev);
 
 	priv->clk = devm_clk_get_optional(&pdev->dev, "sw_sysport");
-	if (IS_ERR(priv->clk))
-		return PTR_ERR(priv->clk);
+	if (IS_ERR(priv->clk)) {
+		ret = PTR_ERR(priv->clk);
+		goto err_free_netdev;
+	}
 
 	/* Allocate number of TX rings */
 	priv->tx_rings = devm_kcalloc(&pdev->dev, txq,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
index d298b935717784e5ba3bdf3344a0898e624a5759..6c6b411e78fd87c66da58761a2f55cb765189a8b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
@@ -469,6 +469,9 @@ int rvu_mbox_handler_cgx_mac_addr_set(struct rvu *rvu,
 	int pf = rvu_get_pf(req->hdr.pcifunc);
 	u8 cgx_id, lmac_id;
 
+	if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc))
+		return -EPERM;
+
 	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
 
 	cgx_lmac_addr_set(cgx_id, lmac_id, req->mac_addr);
@@ -485,6 +488,9 @@ int rvu_mbox_handler_cgx_mac_addr_get(struct rvu *rvu,
 	int rc = 0, i;
 	u64 cfg;
 
+	if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc))
+		return -EPERM;
+
 	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
 
 	rsp->hdr.rc = rc;
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index a560d6be2a445720baeb17efe63af9bda77bab2c..5b2c0cea49eaf6c856f798870abc61c3ea0a047a 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -60,14 +60,27 @@ int ocelot_mact_learn(struct ocelot *ocelot, int port,
 		      const unsigned char mac[ETH_ALEN],
 		      unsigned int vid, enum macaccess_entry_type type)
 {
+	u32 cmd = ANA_TABLES_MACACCESS_VALID |
+		ANA_TABLES_MACACCESS_DEST_IDX(port) |
+		ANA_TABLES_MACACCESS_ENTRYTYPE(type) |
+		ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_LEARN);
+	unsigned int mc_ports;
+
+	/* Set MAC_CPU_COPY if the CPU port is used by a multicast entry */
+	if (type == ENTRYTYPE_MACv4)
+		mc_ports = (mac[1] << 8) | mac[2];
+	else if (type == ENTRYTYPE_MACv6)
+		mc_ports = (mac[0] << 8) | mac[1];
+	else
+		mc_ports = 0;
+
+	if (mc_ports & BIT(ocelot->num_phys_ports))
+		cmd |= ANA_TABLES_MACACCESS_MAC_CPU_COPY;
+
 	ocelot_mact_select(ocelot, mac, vid);
 
 	/* Issue a write command */
-	ocelot_write(ocelot, ANA_TABLES_MACACCESS_VALID |
-			     ANA_TABLES_MACACCESS_DEST_IDX(port) |
-			     ANA_TABLES_MACACCESS_ENTRYTYPE(type) |
-			     ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_LEARN),
-			     ANA_TABLES_MACACCESS);
+	ocelot_write(ocelot, cmd, ANA_TABLES_MACACCESS);
 
 	return ocelot_mact_wait_for_completion(ocelot);
 }
diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index 4585c35c24e52926e26777087ba1ed75cd7530d7..9553eb3e441c91b9cc66e4b55c76bdc61239809b 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -1153,10 +1153,8 @@ static int ocelot_netdevice_event(struct notifier_block *unused,
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	int ret = 0;
 
-	if (!ocelot_netdevice_dev_check(dev))
-		return 0;
-
 	if (event == NETDEV_PRECHANGEUPPER &&
+	    ocelot_netdevice_dev_check(dev) &&
 	    netif_is_lag_master(info->upper_dev)) {
 		struct netdev_lag_upper_info *lag_upper_info = info->upper_info;
 		struct netlink_ext_ack *extack;
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index c633046329352601ce76ad7d12913fffd420e9ec..590b088bc4c7f3e2f0d6d79443fcff6a6f4da077 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -2606,10 +2606,10 @@ static int sh_eth_close(struct net_device *ndev)
 	/* Free all the skbuffs in the Rx queue and the DMA buffer. */
 	sh_eth_ring_free(ndev);
 
-	pm_runtime_put_sync(&mdp->pdev->dev);
-
 	mdp->is_opened = 0;
 
+	pm_runtime_put(&mdp->pdev->dev);
+
 	return 0;
 }
 
@@ -3034,6 +3034,28 @@ static int sh_mdio_release(struct sh_eth_private *mdp)
 	return 0;
 }
 
+static int sh_mdiobb_read(struct mii_bus *bus, int phy, int reg)
+{
+	int res;
+
+	pm_runtime_get_sync(bus->parent);
+	res = mdiobb_read(bus, phy, reg);
+	pm_runtime_put(bus->parent);
+
+	return res;
+}
+
+static int sh_mdiobb_write(struct mii_bus *bus, int phy, int reg, u16 val)
+{
+	int res;
+
+	pm_runtime_get_sync(bus->parent);
+	res = mdiobb_write(bus, phy, reg, val);
+	pm_runtime_put(bus->parent);
+
+	return res;
+}
+
 /* MDIO bus init function */
 static int sh_mdio_init(struct sh_eth_private *mdp,
 			struct sh_eth_plat_data *pd)
@@ -3058,6 +3080,10 @@ static int sh_mdio_init(struct sh_eth_private *mdp,
 	if (!mdp->mii_bus)
 		return -ENOMEM;
 
+	/* Wrap accessors with Runtime PM-aware ops */
+	mdp->mii_bus->read = sh_mdiobb_read;
+	mdp->mii_bus->write = sh_mdiobb_write;
+
 	/* Hook up MII support for ethtool */
 	mdp->mii_bus->name = "sh_mii";
 	mdp->mii_bus->parent = dev;
diff --git a/drivers/net/mdio/mdio-bitbang.c b/drivers/net/mdio/mdio-bitbang.c
index 5136275c8e7399fbbd74d048abb89f50929e5af3..d3915f83185430e9db7e6cfb3903af50ba244d7a 100644
--- a/drivers/net/mdio/mdio-bitbang.c
+++ b/drivers/net/mdio/mdio-bitbang.c
@@ -149,7 +149,7 @@ static int mdiobb_cmd_addr(struct mdiobb_ctrl *ctrl, int phy, u32 addr)
 	return dev_addr;
 }
 
-static int mdiobb_read(struct mii_bus *bus, int phy, int reg)
+int mdiobb_read(struct mii_bus *bus, int phy, int reg)
 {
 	struct mdiobb_ctrl *ctrl = bus->priv;
 	int ret, i;
@@ -180,8 +180,9 @@ static int mdiobb_read(struct mii_bus *bus, int phy, int reg)
 	mdiobb_get_bit(ctrl);
 	return ret;
 }
+EXPORT_SYMBOL(mdiobb_read);
 
-static int mdiobb_write(struct mii_bus *bus, int phy, int reg, u16 val)
+int mdiobb_write(struct mii_bus *bus, int phy, int reg, u16 val)
 {
 	struct mdiobb_ctrl *ctrl = bus->priv;
 
@@ -201,6 +202,7 @@ static int mdiobb_write(struct mii_bus *bus, int phy, int reg, u16 val)
 	mdiobb_get_bit(ctrl);
 	return 0;
 }
+EXPORT_SYMBOL(mdiobb_write);
 
 struct mii_bus *alloc_mdio_bitbang(struct mdiobb_ctrl *ctrl)
 {
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 5a78848db93fdaba8a6888fad3cd2b13ac03cbd6..291e76d32abe7c4ee928f8af1e669d9720495667 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -1827,6 +1827,15 @@ cdc_ncm_speed_change(struct usbnet *dev,
 	uint32_t rx_speed = le32_to_cpu(data->DLBitRRate);
 	uint32_t tx_speed = le32_to_cpu(data->ULBitRate);
 
+	/* if the speed hasn't changed, don't report it.
+	 * RTL8156 shipped before 2021 sends notification about every 32ms.
+	 */
+	if (dev->rx_speed == rx_speed && dev->tx_speed == tx_speed)
+		return;
+
+	dev->rx_speed = rx_speed;
+	dev->tx_speed = tx_speed;
+
 	/*
 	 * Currently the USB-NET API does not support reporting the actual
 	 * device speed. Do print it instead.
@@ -1867,7 +1876,8 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
 		 * USB_CDC_NOTIFY_NETWORK_CONNECTION notification shall be
 		 * sent by device after USB_CDC_NOTIFY_SPEED_CHANGE.
 		 */
-		usbnet_link_change(dev, !!event->wValue, 0);
+		if (netif_carrier_ok(dev->net) != !!event->wValue)
+			usbnet_link_change(dev, !!event->wValue, 0);
 		break;
 
 	case USB_CDC_NOTIFY_SPEED_CHANGE:
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f320273fc6727fec7b9f944c8c1ea895d12e51e0..200bdd672c281ffc54d7d5bc19be654ca2302d48 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2856,6 +2856,11 @@ static const struct attribute_group *nvme_subsys_attrs_groups[] = {
 	NULL,
 };
 
+static inline bool nvme_discovery_ctrl(struct nvme_ctrl *ctrl)
+{
+	return ctrl->opts && ctrl->opts->discovery_nqn;
+}
+
 static bool nvme_validate_cntlid(struct nvme_subsystem *subsys,
 		struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
 {
@@ -2875,7 +2880,7 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys,
 		}
 
 		if ((id->cmic & NVME_CTRL_CMIC_MULTI_CTRL) ||
-		    (ctrl->opts && ctrl->opts->discovery_nqn))
+		    nvme_discovery_ctrl(ctrl))
 			continue;
 
 		dev_err(ctrl->device,
@@ -3144,7 +3149,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 			goto out_free;
 		}
 
-		if (!ctrl->opts->discovery_nqn && !ctrl->kas) {
+		if (!nvme_discovery_ctrl(ctrl) && !ctrl->kas) {
 			dev_err(ctrl->device,
 				"keep-alive support is mandatory for fabrics\n");
 			ret = -EINVAL;
@@ -3184,7 +3189,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 	if (ret < 0)
 		return ret;
 
-	if (!ctrl->identified) {
+	if (!ctrl->identified && !nvme_discovery_ctrl(ctrl)) {
 		ret = nvme_hwmon_init(ctrl);
 		if (ret < 0)
 			return ret;
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 979ee31b8dd1c9c961ab27364d94ea972598e4b6..216619926563ed2c40373ba6981da58d5481c235 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -201,7 +201,7 @@ static inline size_t nvme_tcp_req_cur_offset(struct nvme_tcp_request *req)
 
 static inline size_t nvme_tcp_req_cur_length(struct nvme_tcp_request *req)
 {
-	return min_t(size_t, req->iter.bvec->bv_len - req->iter.iov_offset,
+	return min_t(size_t, iov_iter_single_seg_count(&req->iter),
 			req->pdu_len - req->pdu_sent);
 }
 
@@ -286,7 +286,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
 	 * directly, otherwise queue io_work. Also, only do that if we
 	 * are on the same cpu, so we don't introduce contention.
 	 */
-	if (queue->io_cpu == smp_processor_id() &&
+	if (queue->io_cpu == __smp_processor_id() &&
 	    sync && empty && mutex_trylock(&queue->send_mutex)) {
 		queue->more_requests = !last;
 		nvme_tcp_send_all(queue);
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index bdfc22eb2a10fa95847411b047a4e0ea9a2f676a..06b6b742bb213e3ba924ef113d81e4081379b2be 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -1220,6 +1220,14 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
 	}
 	ndev->inline_data_size = nport->inline_data_size;
 	ndev->inline_page_count = inline_page_count;
+
+	if (nport->pi_enable && !(cm_id->device->attrs.device_cap_flags &
+				  IB_DEVICE_INTEGRITY_HANDOVER)) {
+		pr_warn("T10-PI is not supported by device %s. Disabling it\n",
+			cm_id->device->name);
+		nport->pi_enable = false;
+	}
+
 	ndev->device = cm_id->device;
 	kref_init(&ndev->ref);
 
@@ -1855,14 +1863,6 @@ static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port)
 		goto out_destroy_id;
 	}
 
-	if (port->nport->pi_enable &&
-	    !(cm_id->device->attrs.device_cap_flags &
-	      IB_DEVICE_INTEGRITY_HANDOVER)) {
-		pr_err("T10-PI is not supported for %pISpcs\n", addr);
-		ret = -EINVAL;
-		goto out_destroy_id;
-	}
-
 	port->cm_id = cm_id;
 	return 0;
 
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 794a37d5085376c54b571274f360f9c427d5c5d9..cb2f55f450e4adf05a7b3d81e8b9a01230665584 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -726,11 +726,6 @@ static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu)
 	return per_cpu(hw_events->irq, cpu);
 }
 
-bool arm_pmu_irq_is_nmi(void)
-{
-	return has_nmi;
-}
-
 /*
  * PMU hardware loses all context when a CPU goes offline.
  * When a CPU is hotplugged back in, since some hardware registers are
diff --git a/drivers/scsi/mpt3sas/Kconfig b/drivers/scsi/mpt3sas/Kconfig
index 86209455172d69e440b8794b55342d18d065b81e..c299f7e078fb97c79a13d57c1f7918c4fd84e2a5 100644
--- a/drivers/scsi/mpt3sas/Kconfig
+++ b/drivers/scsi/mpt3sas/Kconfig
@@ -79,5 +79,5 @@ config SCSI_MPT2SAS
 	select SCSI_MPT3SAS
 	depends on PCI && SCSI
 	help
-	Dummy config option for backwards compatiblity: configure the MPT3SAS
+	Dummy config option for backwards compatibility: configure the MPT3SAS
 	driver instead.
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index f5fc7f518f8afea5e05504400450422b0905bc42..47ad64b06623696ca510da02ffafa0d0149256ce 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -2245,7 +2245,7 @@ qedi_show_boot_tgt_info(struct qedi_ctx *qedi, int type,
 			     chap_name);
 		break;
 	case ISCSI_BOOT_TGT_CHAP_SECRET:
-		rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN,
+		rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN,
 			     chap_secret);
 		break;
 	case ISCSI_BOOT_TGT_REV_CHAP_NAME:
@@ -2253,7 +2253,7 @@ qedi_show_boot_tgt_info(struct qedi_ctx *qedi, int type,
 			     mchap_name);
 		break;
 	case ISCSI_BOOT_TGT_REV_CHAP_SECRET:
-		rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN,
+		rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN,
 			     mchap_secret);
 		break;
 	case ISCSI_BOOT_TGT_FLAGS:
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 24c0f7ec03511b31791ad399f43e10e88b20fb6a..4a08c450b756f3399116e4dc61ae67e4b8c20d53 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -6740,7 +6740,7 @@ static int __init scsi_debug_init(void)
 		k = sdeb_zbc_model_str(sdeb_zbc_model_s);
 		if (k < 0) {
 			ret = k;
-			goto free_vm;
+			goto free_q_arr;
 		}
 		sdeb_zbc_model = k;
 		switch (sdeb_zbc_model) {
@@ -6753,7 +6753,8 @@ static int __init scsi_debug_init(void)
 			break;
 		default:
 			pr_err("Invalid ZBC model\n");
-			return -EINVAL;
+			ret = -EINVAL;
+			goto free_q_arr;
 		}
 	}
 	if (sdeb_zbc_model != BLK_ZONED_NONE) {
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 679c2c02504763ba31a21c425d58bdfca02baba8..a3d2d4bc4a3dcda79a3b1368bc397eccc6a90549 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -984,8 +984,10 @@ static blk_status_t sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd)
 		}
 	}
 
-	if (sdp->no_write_same)
+	if (sdp->no_write_same) {
+		rq->rq_flags |= RQF_QUIET;
 		return BLK_STS_TARGET;
+	}
 
 	if (sdkp->ws16 || lba > 0xffffffff || nr_blocks > 0xffff)
 		return sd_setup_write_same16_cmnd(cmd, false);
@@ -3510,10 +3512,8 @@ static int sd_probe(struct device *dev)
 static int sd_remove(struct device *dev)
 {
 	struct scsi_disk *sdkp;
-	dev_t devt;
 
 	sdkp = dev_get_drvdata(dev);
-	devt = disk_devt(sdkp->disk);
 	scsi_autopm_get_device(sdkp->device);
 
 	async_synchronize_full_domain(&scsi_sd_pm_domain);
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 82ad31781bc9e127d9fc6834f3b704e69f1d179c..e31d2c5c7b23b7d152322723fdc449eb5eb5400d 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -289,7 +289,8 @@ static inline void ufshcd_wb_config(struct ufs_hba *hba)
 	if (ret)
 		dev_err(hba->dev, "%s: En WB flush during H8: failed: %d\n",
 			__func__, ret);
-	ufshcd_wb_toggle_flush(hba, true);
+	if (!(hba->quirks & UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL))
+		ufshcd_wb_toggle_flush(hba, true);
 }
 
 static void ufshcd_scsi_unblock_requests(struct ufs_hba *hba)
@@ -5436,9 +5437,6 @@ static int ufshcd_wb_toggle_flush_during_h8(struct ufs_hba *hba, bool set)
 
 static inline void ufshcd_wb_toggle_flush(struct ufs_hba *hba, bool enable)
 {
-	if (hba->quirks & UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL)
-		return;
-
 	if (enable)
 		ufshcd_wb_buf_flush_enable(hba);
 	else
@@ -6661,19 +6659,16 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
 {
 	struct Scsi_Host *host;
 	struct ufs_hba *hba;
-	unsigned int tag;
 	u32 pos;
 	int err;
-	u8 resp = 0xF;
-	struct ufshcd_lrb *lrbp;
+	u8 resp = 0xF, lun;
 	unsigned long flags;
 
 	host = cmd->device->host;
 	hba = shost_priv(host);
-	tag = cmd->request->tag;
 
-	lrbp = &hba->lrb[tag];
-	err = ufshcd_issue_tm_cmd(hba, lrbp->lun, 0, UFS_LOGICAL_RESET, &resp);
+	lun = ufshcd_scsi_to_upiu_lun(cmd->device->lun);
+	err = ufshcd_issue_tm_cmd(hba, lun, 0, UFS_LOGICAL_RESET, &resp);
 	if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
 		if (!err)
 			err = resp;
@@ -6682,7 +6677,7 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
 
 	/* clear the commands that were pending for corresponding LUN */
 	for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs) {
-		if (hba->lrb[pos].lun == lrbp->lun) {
+		if (hba->lrb[pos].lun == lun) {
 			err = ufshcd_clear_cmd(hba, pos);
 			if (err)
 				break;
@@ -8698,6 +8693,8 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
 			ufshcd_wb_need_flush(hba));
 	}
 
+	flush_work(&hba->eeh_work);
+
 	if (req_dev_pwr_mode != hba->curr_dev_pwr_mode) {
 		if (!ufshcd_is_runtime_pm(pm_op))
 			/* ensure that bkops is disabled */
@@ -8710,8 +8707,6 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
 		}
 	}
 
-	flush_work(&hba->eeh_work);
-
 	/*
 	 * In the case of DeepSleep, the device is expected to remain powered
 	 * with the link off, so do not check for bkops.
@@ -8938,7 +8933,8 @@ int ufshcd_system_suspend(struct ufs_hba *hba)
 	if ((ufs_get_pm_lvl_to_dev_pwr_mode(hba->spm_lvl) ==
 	     hba->curr_dev_pwr_mode) &&
 	    (ufs_get_pm_lvl_to_link_pwr_state(hba->spm_lvl) ==
-	     hba->uic_link_state))
+	     hba->uic_link_state) &&
+	     !hba->dev_info.b_rpm_dev_flush_capable)
 		goto out;
 
 	if (pm_runtime_suspended(hba->dev)) {
diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c
index 70467b9d61baa3d13e3851e3abd25a36b2f5919e..a3afd1b9ac567bcc48c809edef5d0509ae5dd0aa 100644
--- a/drivers/spi/spi-cadence.c
+++ b/drivers/spi/spi-cadence.c
@@ -115,6 +115,7 @@ struct cdns_spi {
 	void __iomem *regs;
 	struct clk *ref_clk;
 	struct clk *pclk;
+	unsigned int clk_rate;
 	u32 speed_hz;
 	const u8 *txbuf;
 	u8 *rxbuf;
@@ -250,7 +251,7 @@ static void cdns_spi_config_clock_freq(struct spi_device *spi,
 	u32 ctrl_reg, baud_rate_val;
 	unsigned long frequency;
 
-	frequency = clk_get_rate(xspi->ref_clk);
+	frequency = xspi->clk_rate;
 
 	ctrl_reg = cdns_spi_read(xspi, CDNS_SPI_CR);
 
@@ -558,8 +559,9 @@ static int cdns_spi_probe(struct platform_device *pdev)
 	master->auto_runtime_pm = true;
 	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
 
+	xspi->clk_rate = clk_get_rate(xspi->ref_clk);
 	/* Set to default valid value */
-	master->max_speed_hz = clk_get_rate(xspi->ref_clk) / 4;
+	master->max_speed_hz = xspi->clk_rate / 4;
 	xspi->speed_hz = master->max_speed_hz;
 
 	master->bits_per_word_mask = SPI_BPW_MASK(8);
diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c
index 9494257e1c33f56173dfefb7ef7c2b49100adc01..6d8e0a05a53554b4393752a6a6cad93b6276d548 100644
--- a/drivers/spi/spi-fsl-spi.c
+++ b/drivers/spi/spi-fsl-spi.c
@@ -115,14 +115,13 @@ static void fsl_spi_chipselect(struct spi_device *spi, int value)
 {
 	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(spi->master);
 	struct fsl_spi_platform_data *pdata;
-	bool pol = spi->mode & SPI_CS_HIGH;
 	struct spi_mpc8xxx_cs	*cs = spi->controller_state;
 
 	pdata = spi->dev.parent->parent->platform_data;
 
 	if (value == BITBANG_CS_INACTIVE) {
 		if (pdata->cs_control)
-			pdata->cs_control(spi, !pol);
+			pdata->cs_control(spi, false);
 	}
 
 	if (value == BITBANG_CS_ACTIVE) {
@@ -134,7 +133,7 @@ static void fsl_spi_chipselect(struct spi_device *spi, int value)
 		fsl_spi_change_mode(spi);
 
 		if (pdata->cs_control)
-			pdata->cs_control(spi, pol);
+			pdata->cs_control(spi, true);
 	}
 }
 
diff --git a/drivers/tty/serial/sifive.c b/drivers/tty/serial/sifive.c
index 1066eebe3b28b046deb69348b1357c77eed90185..328d5a78792fe78c19400fb78373fcee440c3a99 100644
--- a/drivers/tty/serial/sifive.c
+++ b/drivers/tty/serial/sifive.c
@@ -1000,6 +1000,7 @@ static int sifive_serial_probe(struct platform_device *pdev)
 	/* Set up clock divider */
 	ssp->clkin_rate = clk_get_rate(ssp->clk);
 	ssp->baud_rate = SIFIVE_DEFAULT_BAUD_RATE;
+	ssp->port.uartclk = ssp->baud_rate * 16;
 	__ssp_update_div(ssp);
 
 	platform_set_drvdata(pdev, ssp);
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index a8030332a19169b4805c8537b2a0df359a491833..e850f79351cbb4690fe757293935daeab179c5bf 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -2060,16 +2060,6 @@ static struct irq_chip xen_percpu_chip __read_mostly = {
 	.irq_ack		= ack_dynirq,
 };
 
-int xen_set_callback_via(uint64_t via)
-{
-	struct xen_hvm_param a;
-	a.domid = DOMID_SELF;
-	a.index = HVM_PARAM_CALLBACK_IRQ;
-	a.value = via;
-	return HYPERVISOR_hvm_op(HVMOP_set_param, &a);
-}
-EXPORT_SYMBOL_GPL(xen_set_callback_via);
-
 #ifdef CONFIG_XEN_PVHVM
 /* Vector callbacks are better than PCI interrupts to receive event
  * channel notifications because we can receive vector callbacks on any
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index dd911e1ff782cc4d54556d1d034fa9ed15c0b7bd..18f0ed8b1f93b8fab25824162f86bac9beb2d781 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -132,6 +132,13 @@ static int platform_pci_probe(struct pci_dev *pdev,
 			dev_warn(&pdev->dev, "request_irq failed err=%d\n", ret);
 			goto out;
 		}
+		/*
+		 * It doesn't strictly *have* to run on CPU0 but it sure
+		 * as hell better process the event channel ports delivered
+		 * to CPU0.
+		 */
+		irq_set_affinity(pdev->irq, cpumask_of(0));
+
 		callback_via = get_callback_via(pdev);
 		ret = xen_set_callback_via(callback_via);
 		if (ret) {
@@ -149,7 +156,6 @@ static int platform_pci_probe(struct pci_dev *pdev,
 	ret = gnttab_init();
 	if (ret)
 		goto grant_out;
-	xenbus_probe(NULL);
 	return 0;
 grant_out:
 	gnttab_free_auto_xlat_frames();
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index b0c73c58f9874a34f81fcd4477cc752cdfdbf381..720a7b7abd46d690f2ed86f544064403ab4f1647 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -717,14 +717,15 @@ static long privcmd_ioctl_restrict(struct file *file, void __user *udata)
 	return 0;
 }
 
-static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata)
+static long privcmd_ioctl_mmap_resource(struct file *file,
+				struct privcmd_mmap_resource __user *udata)
 {
 	struct privcmd_data *data = file->private_data;
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	struct privcmd_mmap_resource kdata;
 	xen_pfn_t *pfns = NULL;
-	struct xen_mem_acquire_resource xdata;
+	struct xen_mem_acquire_resource xdata = { };
 	int rc;
 
 	if (copy_from_user(&kdata, udata, sizeof(kdata)))
@@ -734,6 +735,22 @@ static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata)
 	if (data->domid != DOMID_INVALID && data->domid != kdata.dom)
 		return -EPERM;
 
+	/* Both fields must be set or unset */
+	if (!!kdata.addr != !!kdata.num)
+		return -EINVAL;
+
+	xdata.domid = kdata.dom;
+	xdata.type = kdata.type;
+	xdata.id = kdata.id;
+
+	if (!kdata.addr && !kdata.num) {
+		/* Query the size of the resource. */
+		rc = HYPERVISOR_memory_op(XENMEM_acquire_resource, &xdata);
+		if (rc)
+			return rc;
+		return __put_user(xdata.nr_frames, &udata->num);
+	}
+
 	mmap_write_lock(mm);
 
 	vma = find_vma(mm, kdata.addr);
@@ -768,10 +785,6 @@ static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata)
 	} else
 		vma->vm_private_data = PRIV_VMA_LOCKED;
 
-	memset(&xdata, 0, sizeof(xdata));
-	xdata.domid = kdata.dom;
-	xdata.type = kdata.type;
-	xdata.id = kdata.id;
 	xdata.frame = kdata.idx;
 	xdata.nr_frames = kdata.num;
 	set_xen_guest_handle(xdata.frame_list, pfns);
diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h
index 2a93b7c9c159929557f845c549b7f7676efd3a11..dc153733541441ed624066829d88b407effe429a 100644
--- a/drivers/xen/xenbus/xenbus.h
+++ b/drivers/xen/xenbus/xenbus.h
@@ -115,6 +115,7 @@ int xenbus_probe_node(struct xen_bus_type *bus,
 		      const char *type,
 		      const char *nodename);
 int xenbus_probe_devices(struct xen_bus_type *bus);
+void xenbus_probe(void);
 
 void xenbus_dev_changed(const char *node, struct xen_bus_type *bus);
 
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
index eb5151fc8efab02899ce764b99a8d6f2955e9ca7..e5fda0256feb3d476aeee5f2d1e28ac7bd29f226 100644
--- a/drivers/xen/xenbus/xenbus_comms.c
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -57,16 +57,8 @@ DEFINE_MUTEX(xs_response_mutex);
 static int xenbus_irq;
 static struct task_struct *xenbus_task;
 
-static DECLARE_WORK(probe_work, xenbus_probe);
-
-
 static irqreturn_t wake_waiting(int irq, void *unused)
 {
-	if (unlikely(xenstored_ready == 0)) {
-		xenstored_ready = 1;
-		schedule_work(&probe_work);
-	}
-
 	wake_up(&xb_waitq);
 	return IRQ_HANDLED;
 }
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 44634d970a5ca1f3bb4b0a1278e245121cd69135..c8f0282bb64975b34f44459c0565156293b0ea64 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -683,29 +683,76 @@ void unregister_xenstore_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL_GPL(unregister_xenstore_notifier);
 
-void xenbus_probe(struct work_struct *unused)
+void xenbus_probe(void)
 {
 	xenstored_ready = 1;
 
+	/*
+	 * In the HVM case, xenbus_init() deferred its call to
+	 * xs_init() in case callbacks were not operational yet.
+	 * So do it now.
+	 */
+	if (xen_store_domain_type == XS_HVM)
+		xs_init();
+
 	/* Notify others that xenstore is up */
 	blocking_notifier_call_chain(&xenstore_chain, 0, NULL);
 }
-EXPORT_SYMBOL_GPL(xenbus_probe);
 
-static int __init xenbus_probe_initcall(void)
+/*
+ * Returns true when XenStore init must be deferred in order to
+ * allow the PCI platform device to be initialised, before we
+ * can actually have event channel interrupts working.
+ */
+static bool xs_hvm_defer_init_for_callback(void)
 {
-	if (!xen_domain())
-		return -ENODEV;
+#ifdef CONFIG_XEN_PVHVM
+	return xen_store_domain_type == XS_HVM &&
+		!xen_have_vector_callback;
+#else
+	return false;
+#endif
+}
 
-	if (xen_initial_domain() || xen_hvm_domain())
-		return 0;
+static int __init xenbus_probe_initcall(void)
+{
+	/*
+	 * Probe XenBus here in the XS_PV case, and also XS_HVM unless we
+	 * need to wait for the platform PCI device to come up.
+	 */
+	if (xen_store_domain_type == XS_PV ||
+	    (xen_store_domain_type == XS_HVM &&
+	     !xs_hvm_defer_init_for_callback()))
+		xenbus_probe();
 
-	xenbus_probe(NULL);
 	return 0;
 }
-
 device_initcall(xenbus_probe_initcall);
 
+int xen_set_callback_via(uint64_t via)
+{
+	struct xen_hvm_param a;
+	int ret;
+
+	a.domid = DOMID_SELF;
+	a.index = HVM_PARAM_CALLBACK_IRQ;
+	a.value = via;
+
+	ret = HYPERVISOR_hvm_op(HVMOP_set_param, &a);
+	if (ret)
+		return ret;
+
+	/*
+	 * If xenbus_probe_initcall() deferred the xenbus_probe()
+	 * due to the callback not functioning yet, we can do it now.
+	 */
+	if (!xenstored_ready && xs_hvm_defer_init_for_callback())
+		xenbus_probe();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xen_set_callback_via);
+
 /* Set up event channel for xenstored which is run as a local process
  * (this is normally used only in dom0)
  */
@@ -818,11 +865,17 @@ static int __init xenbus_init(void)
 		break;
 	}
 
-	/* Initialize the interface to xenstore. */
-	err = xs_init();
-	if (err) {
-		pr_warn("Error initializing xenstore comms: %i\n", err);
-		goto out_error;
+	/*
+	 * HVM domains may not have a functional callback yet. In that
+	 * case let xs_init() be called from xenbus_probe(), which will
+	 * get invoked at an appropriate time.
+	 */
+	if (xen_store_domain_type != XS_HVM) {
+		err = xs_init();
+		if (err) {
+			pr_warn("Error initializing xenstore comms: %i\n", err);
+			goto out_error;
+		}
 	}
 
 	if ((xen_store_domain_type != XS_LOCAL) &&
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 8bda092e60c5a00118ec8d5159f8a30bdcd38a04..e027c718ca01adea87dbd12bd5fa7b2945fb9c0c 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -413,7 +413,6 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
 
 	inode = d_backing_inode(object->backer);
 	ASSERT(S_ISREG(inode->i_mode));
-	ASSERT(inode->i_mapping->a_ops->readpages);
 
 	/* calculate the shift required to use bmap */
 	shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
@@ -713,7 +712,6 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
 
 	inode = d_backing_inode(object->backer);
 	ASSERT(S_ISREG(inode->i_mode));
-	ASSERT(inode->i_mapping->a_ops->readpages);
 
 	/* calculate the shift required to use bmap */
 	shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index b9df85506938d55fa86ac2ceaf19ad61ad188b43..5d39129406ea66668a62a4d9d990bb89f8ad3d49 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3740,7 +3740,7 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
 
 	if (!ses->binding) {
 		ses->capabilities = server->capabilities;
-		if (linuxExtEnabled == 0)
+		if (!linuxExtEnabled)
 			ses->capabilities &= (~server->vals->cap_unix);
 
 		if (ses->auth_key.response) {
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index 6ad6ba5f6ebeef20bcf68f2d3ae27e432dcd4d04..0fdb0de7ff861b89e90ebc43b40921f79411eef2 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -1260,7 +1260,8 @@ void dfs_cache_del_vol(const char *fullpath)
 	vi = find_vol(fullpath);
 	spin_unlock(&vol_list_lock);
 
-	kref_put(&vi->refcnt, vol_release);
+	if (!IS_ERR(vi))
+		kref_put(&vi->refcnt, vol_release);
 }
 
 /**
diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index 0afccbbed2e65363ef029eb6c5caf3af2cb77a70..076bcadc756a751ffaa43b55a107404b33b203fa 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -303,8 +303,6 @@ do {									\
 int
 smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx)
 {
-	int rc = 0;
-
 	memcpy(new_ctx, ctx, sizeof(*ctx));
 	new_ctx->prepath = NULL;
 	new_ctx->mount_options = NULL;
@@ -327,7 +325,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx
 	DUP_CTX_STR(nodename);
 	DUP_CTX_STR(iocharset);
 
-	return rc;
+	return 0;
 }
 
 static int
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 067eb44c7baa863c1e7ccd2c2f599be0b067f320..794fc3b68b4f96905eef5c0d6e27b03656753c1b 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -3248,7 +3248,7 @@ __SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
 	free_rsp_buf(resp_buftype, rsp);
 
 	/* retry close in a worker thread if this one is interrupted */
-	if (rc == -EINTR) {
+	if (is_interrupt_error(rc)) {
 		int tmp_rc;
 
 		tmp_rc = smb2_handle_cancelled_close(tcon, persistent_fid,
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 204a622b89ed3575d3e5cda23e115341615afb7a..d85edf5d1429418728d9fa44fac2a2d9a6df7684 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -424,7 +424,7 @@ struct smb2_rdma_transform_capabilities_context {
 	__le16	TransformCount;
 	__u16	Reserved1;
 	__u32	Reserved2;
-	__le16	RDMATransformIds[1];
+	__le16	RDMATransformIds[];
 } __packed;
 
 /* Signing algorithms */
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 1a0a827a7f34578984f6185c9d235d4248bae8a7..be799040a4154ab075a7aeb104309730ce8ddc8e 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -372,20 +372,3 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
 	}
 	return err;
 }
-
-int __ext4_handle_dirty_super(const char *where, unsigned int line,
-			      handle_t *handle, struct super_block *sb)
-{
-	struct buffer_head *bh = EXT4_SB(sb)->s_sbh;
-	int err = 0;
-
-	ext4_superblock_csum_set(sb);
-	if (ext4_handle_valid(handle)) {
-		err = jbd2_journal_dirty_metadata(handle, bh);
-		if (err)
-			ext4_journal_abort_handle(where, line, __func__,
-						  bh, handle, err);
-	} else
-		mark_buffer_dirty(bh);
-	return err;
-}
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index a124c68b0c75e10938578dab8f257c09e70cff86..0d2fa423b7adbea8e4e12eec74552ee71884b861 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -244,9 +244,6 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
 				 handle_t *handle, struct inode *inode,
 				 struct buffer_head *bh);
 
-int __ext4_handle_dirty_super(const char *where, unsigned int line,
-			      handle_t *handle, struct super_block *sb);
-
 #define ext4_journal_get_write_access(handle, bh) \
 	__ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh))
 #define ext4_forget(handle, is_metadata, inode, bh, block_nr) \
@@ -257,8 +254,6 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line,
 #define ext4_handle_dirty_metadata(handle, inode, bh) \
 	__ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \
 				     (bh))
-#define ext4_handle_dirty_super(handle, sb) \
-	__ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb))
 
 handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
 				  int type, int blocks, int rsv_blocks,
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 4fcc21c25e79399c8b3ae9069d93d063e78d50d2..0a14a7c87bf82e3b20748cc627fb40d03dad6268 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -604,13 +604,13 @@ void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t star
 	trace_ext4_fc_track_range(inode, start, end, ret);
 }
 
-static void ext4_fc_submit_bh(struct super_block *sb)
+static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail)
 {
 	int write_flags = REQ_SYNC;
 	struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
 
-	/* TODO: REQ_FUA | REQ_PREFLUSH is unnecessarily expensive. */
-	if (test_opt(sb, BARRIER))
+	/* Add REQ_FUA | REQ_PREFLUSH only its tail */
+	if (test_opt(sb, BARRIER) && is_tail)
 		write_flags |= REQ_FUA | REQ_PREFLUSH;
 	lock_buffer(bh);
 	set_buffer_dirty(bh);
@@ -684,7 +684,7 @@ static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc)
 		*crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl));
 	if (pad_len > 0)
 		ext4_fc_memzero(sb, tl + 1, pad_len, crc);
-	ext4_fc_submit_bh(sb);
+	ext4_fc_submit_bh(sb, false);
 
 	ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
 	if (ret)
@@ -741,7 +741,7 @@ static int ext4_fc_write_tail(struct super_block *sb, u32 crc)
 	tail.fc_crc = cpu_to_le32(crc);
 	ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL);
 
-	ext4_fc_submit_bh(sb);
+	ext4_fc_submit_bh(sb, true);
 
 	return 0;
 }
@@ -1268,7 +1268,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
 	list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING],
 				&sbi->s_fc_dentry_q[FC_Q_MAIN]);
 	list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
-				&sbi->s_fc_q[FC_Q_STAGING]);
+				&sbi->s_fc_q[FC_Q_MAIN]);
 
 	ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
 	ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
@@ -1318,14 +1318,14 @@ static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl)
 	entry.len = darg.dname_len;
 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
 
-	if (IS_ERR_OR_NULL(inode)) {
+	if (IS_ERR(inode)) {
 		jbd_debug(1, "Inode %d not found", darg.ino);
 		return 0;
 	}
 
 	old_parent = ext4_iget(sb, darg.parent_ino,
 				EXT4_IGET_NORMAL);
-	if (IS_ERR_OR_NULL(old_parent)) {
+	if (IS_ERR(old_parent)) {
 		jbd_debug(1, "Dir with inode  %d not found", darg.parent_ino);
 		iput(inode);
 		return 0;
@@ -1410,7 +1410,7 @@ static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl)
 			darg.parent_ino, darg.dname_len);
 
 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
-	if (IS_ERR_OR_NULL(inode)) {
+	if (IS_ERR(inode)) {
 		jbd_debug(1, "Inode not found.");
 		return 0;
 	}
@@ -1466,10 +1466,11 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl)
 	trace_ext4_fc_replay(sb, tag, ino, 0, 0);
 
 	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
-	if (!IS_ERR_OR_NULL(inode)) {
+	if (!IS_ERR(inode)) {
 		ext4_ext_clear_bb(inode);
 		iput(inode);
 	}
+	inode = NULL;
 
 	ext4_fc_record_modified_inode(sb, ino);
 
@@ -1512,7 +1513,7 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl)
 
 	/* Given that we just wrote the inode on disk, this SHOULD succeed. */
 	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
-	if (IS_ERR_OR_NULL(inode)) {
+	if (IS_ERR(inode)) {
 		jbd_debug(1, "Inode not found.");
 		return -EFSCORRUPTED;
 	}
@@ -1564,7 +1565,7 @@ static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl)
 		goto out;
 
 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
-	if (IS_ERR_OR_NULL(inode)) {
+	if (IS_ERR(inode)) {
 		jbd_debug(1, "inode %d not found.", darg.ino);
 		inode = NULL;
 		ret = -EINVAL;
@@ -1577,7 +1578,7 @@ static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl)
 		 * dot and dot dot dirents are setup properly.
 		 */
 		dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL);
-		if (IS_ERR_OR_NULL(dir)) {
+		if (IS_ERR(dir)) {
 			jbd_debug(1, "Dir %d not found.", darg.ino);
 			goto out;
 		}
@@ -1653,7 +1654,7 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
 
 	inode = ext4_iget(sb, le32_to_cpu(fc_add_ex->fc_ino),
 				EXT4_IGET_NORMAL);
-	if (IS_ERR_OR_NULL(inode)) {
+	if (IS_ERR(inode)) {
 		jbd_debug(1, "Inode not found.");
 		return 0;
 	}
@@ -1777,7 +1778,7 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl)
 		le32_to_cpu(lrange->fc_ino), cur, remaining);
 
 	inode = ext4_iget(sb, le32_to_cpu(lrange->fc_ino), EXT4_IGET_NORMAL);
-	if (IS_ERR_OR_NULL(inode)) {
+	if (IS_ERR(inode)) {
 		jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange->fc_ino));
 		return 0;
 	}
@@ -1832,7 +1833,7 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
 	for (i = 0; i < state->fc_modified_inodes_used; i++) {
 		inode = ext4_iget(sb, state->fc_modified_inodes[i],
 			EXT4_IGET_NORMAL);
-		if (IS_ERR_OR_NULL(inode)) {
+		if (IS_ERR(inode)) {
 			jbd_debug(1, "Inode %d not found.",
 				state->fc_modified_inodes[i]);
 			continue;
@@ -1849,7 +1850,7 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
 
 			if (ret > 0) {
 				path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
-				if (!IS_ERR_OR_NULL(path)) {
+				if (!IS_ERR(path)) {
 					for (j = 0; j < path->p_depth; j++)
 						ext4_mb_mark_bb(inode->i_sb,
 							path[j].p_block, 1, 1);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 3ed8c048fb12c547775850ba62d364bc08362c20..349b27f0dda0cba64cc10575bb233b9b52de2bd4 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -809,9 +809,12 @@ static int ext4_sample_last_mounted(struct super_block *sb,
 	err = ext4_journal_get_write_access(handle, sbi->s_sbh);
 	if (err)
 		goto out_journal;
-	strlcpy(sbi->s_es->s_last_mounted, cp,
+	lock_buffer(sbi->s_sbh);
+	strncpy(sbi->s_es->s_last_mounted, cp,
 		sizeof(sbi->s_es->s_last_mounted));
-	ext4_handle_dirty_super(handle, sb);
+	ext4_superblock_csum_set(sb);
+	unlock_buffer(sbi->s_sbh);
+	ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
 out_journal:
 	ext4_journal_stop(handle);
 out:
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 27946882d4ce45b7b544bce1b28a26eef006cd64..c173c840585618b0be404937154dd8516eccf60a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5150,9 +5150,13 @@ static int ext4_do_update_inode(handle_t *handle,
 		err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
 		if (err)
 			goto out_brelse;
+		lock_buffer(EXT4_SB(sb)->s_sbh);
 		ext4_set_feature_large_file(sb);
+		ext4_superblock_csum_set(sb);
+		unlock_buffer(EXT4_SB(sb)->s_sbh);
 		ext4_handle_sync(handle);
-		err = ext4_handle_dirty_super(handle, sb);
+		err = ext4_handle_dirty_metadata(handle, NULL,
+						 EXT4_SB(sb)->s_sbh);
 	}
 	ext4_update_inode_fsync_trans(handle, inode, need_datasync);
 out_brelse:
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 524e134324475e4fffd769b30e5b13bc5cb258f9..d9665d2f82db845175e290b70d1ea7cb26f9b938 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -1157,7 +1157,10 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			err = ext4_journal_get_write_access(handle, sbi->s_sbh);
 			if (err)
 				goto pwsalt_err_journal;
+			lock_buffer(sbi->s_sbh);
 			generate_random_uuid(sbi->s_es->s_encrypt_pw_salt);
+			ext4_superblock_csum_set(sb);
+			unlock_buffer(sbi->s_sbh);
 			err = ext4_handle_dirty_metadata(handle, NULL,
 							 sbi->s_sbh);
 		pwsalt_err_journal:
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index b17a082b7db15298e99813cb22d07fd4d0c2e458..cf652ba3e74d29f122b960bab6dc2aa93bb4945c 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2976,14 +2976,17 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
 	    (le32_to_cpu(sbi->s_es->s_inodes_count))) {
 		/* Insert this inode at the head of the on-disk orphan list */
 		NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan);
+		lock_buffer(sbi->s_sbh);
 		sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
+		ext4_superblock_csum_set(sb);
+		unlock_buffer(sbi->s_sbh);
 		dirty = true;
 	}
 	list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan);
 	mutex_unlock(&sbi->s_orphan_lock);
 
 	if (dirty) {
-		err = ext4_handle_dirty_super(handle, sb);
+		err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
 		rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
 		if (!err)
 			err = rc;
@@ -3059,9 +3062,12 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
 			mutex_unlock(&sbi->s_orphan_lock);
 			goto out_brelse;
 		}
+		lock_buffer(sbi->s_sbh);
 		sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
+		ext4_superblock_csum_set(inode->i_sb);
+		unlock_buffer(sbi->s_sbh);
 		mutex_unlock(&sbi->s_orphan_lock);
-		err = ext4_handle_dirty_super(handle, inode->i_sb);
+		err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
 	} else {
 		struct ext4_iloc iloc2;
 		struct inode *i_prev =
@@ -3593,9 +3599,6 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent,
 			return retval2;
 		}
 	}
-	brelse(ent->bh);
-	ent->bh = NULL;
-
 	return retval;
 }
 
@@ -3794,6 +3797,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 		}
 	}
 
+	old_file_type = old.de->file_type;
 	if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
 		ext4_handle_sync(handle);
 
@@ -3821,7 +3825,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 	force_reread = (new.dir->i_ino == old.dir->i_ino &&
 			ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA));
 
-	old_file_type = old.de->file_type;
 	if (whiteout) {
 		/*
 		 * Do this before adding a new entry, so the old entry is sure
@@ -3919,15 +3922,19 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 	retval = 0;
 
 end_rename:
-	brelse(old.dir_bh);
-	brelse(old.bh);
-	brelse(new.bh);
 	if (whiteout) {
-		if (retval)
+		if (retval) {
+			ext4_setent(handle, &old,
+				old.inode->i_ino, old_file_type);
 			drop_nlink(whiteout);
+		}
 		unlock_new_inode(whiteout);
 		iput(whiteout);
+
 	}
+	brelse(old.dir_bh);
+	brelse(old.bh);
+	brelse(new.bh);
 	if (handle)
 		ext4_journal_stop(handle);
 	return retval;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 928700d57eb67e5bc01340af2dd8826a6e9c6718..bd0d185654f3357cdc7a5826a6d3afa87c052dff 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -899,8 +899,11 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	EXT4_SB(sb)->s_gdb_count++;
 	ext4_kvfree_array_rcu(o_group_desc);
 
+	lock_buffer(EXT4_SB(sb)->s_sbh);
 	le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
-	err = ext4_handle_dirty_super(handle, sb);
+	ext4_superblock_csum_set(sb);
+	unlock_buffer(EXT4_SB(sb)->s_sbh);
+	err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
 	if (err)
 		ext4_std_error(sb, err);
 	return err;
@@ -1384,6 +1387,7 @@ static void ext4_update_super(struct super_block *sb,
 	reserved_blocks *= blocks_count;
 	do_div(reserved_blocks, 100);
 
+	lock_buffer(sbi->s_sbh);
 	ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count);
 	ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + free_blocks);
 	le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) *
@@ -1421,6 +1425,8 @@ static void ext4_update_super(struct super_block *sb,
 	 * active. */
 	ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) +
 				reserved_blocks);
+	ext4_superblock_csum_set(sb);
+	unlock_buffer(sbi->s_sbh);
 
 	/* Update the free space counts */
 	percpu_counter_add(&sbi->s_freeclusters_counter,
@@ -1515,7 +1521,7 @@ static int ext4_flex_group_add(struct super_block *sb,
 
 	ext4_update_super(sb, flex_gd);
 
-	err = ext4_handle_dirty_super(handle, sb);
+	err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
 
 exit_journal:
 	err2 = ext4_journal_stop(handle);
@@ -1717,15 +1723,18 @@ static int ext4_group_extend_no_check(struct super_block *sb,
 		goto errout;
 	}
 
+	lock_buffer(EXT4_SB(sb)->s_sbh);
 	ext4_blocks_count_set(es, o_blocks_count + add);
 	ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + add);
+	ext4_superblock_csum_set(sb);
+	unlock_buffer(EXT4_SB(sb)->s_sbh);
 	ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
 		   o_blocks_count + add);
 	/* We add the blocks to the bitmap and set the group need init bit */
 	err = ext4_group_add_blocks(handle, sb, o_blocks_count, add);
 	if (err)
 		goto errout;
-	ext4_handle_dirty_super(handle, sb);
+	ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
 	ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
 		   o_blocks_count + add);
 errout:
@@ -1874,12 +1883,15 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode)
 	if (err)
 		goto errout;
 
+	lock_buffer(sbi->s_sbh);
 	ext4_clear_feature_resize_inode(sb);
 	ext4_set_feature_meta_bg(sb);
 	sbi->s_es->s_first_meta_bg =
 		cpu_to_le32(num_desc_blocks(sb, sbi->s_groups_count));
+	ext4_superblock_csum_set(sb);
+	unlock_buffer(sbi->s_sbh);
 
-	err = ext4_handle_dirty_super(handle, sb);
+	err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
 	if (err) {
 		ext4_std_error(sb, err);
 		goto errout;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 21121787c874e186ed547177adcffdebbd3891a6..9a6f9875aa3499fb7ac3a48a062f933b162aa2b9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -65,7 +65,8 @@ static struct ratelimit_state ext4_mount_msg_ratelimit;
 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
 			     unsigned long journal_devnum);
 static int ext4_show_options(struct seq_file *seq, struct dentry *root);
-static int ext4_commit_super(struct super_block *sb, int sync);
+static void ext4_update_super(struct super_block *sb);
+static int ext4_commit_super(struct super_block *sb);
 static int ext4_mark_recovery_complete(struct super_block *sb,
 					struct ext4_super_block *es);
 static int ext4_clear_journal_err(struct super_block *sb,
@@ -586,15 +587,12 @@ static int ext4_errno_to_code(int errno)
 	return EXT4_ERR_UNKNOWN;
 }
 
-static void __save_error_info(struct super_block *sb, int error,
-			      __u32 ino, __u64 block,
-			      const char *func, unsigned int line)
+static void save_error_info(struct super_block *sb, int error,
+			    __u32 ino, __u64 block,
+			    const char *func, unsigned int line)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
-	EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
-	if (bdev_read_only(sb->s_bdev))
-		return;
 	/* We default to EFSCORRUPTED error... */
 	if (error == 0)
 		error = EFSCORRUPTED;
@@ -618,15 +616,6 @@ static void __save_error_info(struct super_block *sb, int error,
 	spin_unlock(&sbi->s_error_lock);
 }
 
-static void save_error_info(struct super_block *sb, int error,
-			    __u32 ino, __u64 block,
-			    const char *func, unsigned int line)
-{
-	__save_error_info(sb, error, ino, block, func, line);
-	if (!bdev_read_only(sb->s_bdev))
-		ext4_commit_super(sb, 1);
-}
-
 /* Deal with the reporting of failure conditions on a filesystem such as
  * inconsistencies detected or read IO failures.
  *
@@ -647,19 +636,40 @@ static void save_error_info(struct super_block *sb, int error,
  * used to deal with unrecoverable failures such as journal IO errors or ENOMEM
  * at a critical moment in log management.
  */
-static void ext4_handle_error(struct super_block *sb, bool force_ro)
+static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
+			      __u32 ino, __u64 block,
+			      const char *func, unsigned int line)
 {
 	journal_t *journal = EXT4_SB(sb)->s_journal;
+	bool continue_fs = !force_ro && test_opt(sb, ERRORS_CONT);
 
+	EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 	if (test_opt(sb, WARN_ON_ERROR))
 		WARN_ON_ONCE(1);
 
-	if (sb_rdonly(sb) || (!force_ro && test_opt(sb, ERRORS_CONT)))
+	if (!continue_fs && !sb_rdonly(sb)) {
+		ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
+		if (journal)
+			jbd2_journal_abort(journal, -EIO);
+	}
+
+	if (!bdev_read_only(sb->s_bdev)) {
+		save_error_info(sb, error, ino, block, func, line);
+		/*
+		 * In case the fs should keep running, we need to writeout
+		 * superblock through the journal. Due to lock ordering
+		 * constraints, it may not be safe to do it right here so we
+		 * defer superblock flushing to a workqueue.
+		 */
+		if (continue_fs)
+			schedule_work(&EXT4_SB(sb)->s_error_work);
+		else
+			ext4_commit_super(sb);
+	}
+
+	if (sb_rdonly(sb) || continue_fs)
 		return;
 
-	ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
-	if (journal)
-		jbd2_journal_abort(journal, -EIO);
 	/*
 	 * We force ERRORS_RO behavior when system is rebooting. Otherwise we
 	 * could panic during 'reboot -f' as the underlying device got already
@@ -682,8 +692,39 @@ static void flush_stashed_error_work(struct work_struct *work)
 {
 	struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
 						s_error_work);
+	journal_t *journal = sbi->s_journal;
+	handle_t *handle;
 
-	ext4_commit_super(sbi->s_sb, 1);
+	/*
+	 * If the journal is still running, we have to write out superblock
+	 * through the journal to avoid collisions of other journalled sb
+	 * updates.
+	 *
+	 * We use directly jbd2 functions here to avoid recursing back into
+	 * ext4 error handling code during handling of previous errors.
+	 */
+	if (!sb_rdonly(sbi->s_sb) && journal) {
+		handle = jbd2_journal_start(journal, 1);
+		if (IS_ERR(handle))
+			goto write_directly;
+		if (jbd2_journal_get_write_access(handle, sbi->s_sbh)) {
+			jbd2_journal_stop(handle);
+			goto write_directly;
+		}
+		ext4_update_super(sbi->s_sb);
+		if (jbd2_journal_dirty_metadata(handle, sbi->s_sbh)) {
+			jbd2_journal_stop(handle);
+			goto write_directly;
+		}
+		jbd2_journal_stop(handle);
+		return;
+	}
+write_directly:
+	/*
+	 * Write through journal failed. Write sb directly to get error info
+	 * out and hope for the best.
+	 */
+	ext4_commit_super(sbi->s_sb);
 }
 
 #define ext4_error_ratelimit(sb)					\
@@ -710,8 +751,7 @@ void __ext4_error(struct super_block *sb, const char *function,
 		       sb->s_id, function, line, current->comm, &vaf);
 		va_end(args);
 	}
-	save_error_info(sb, error, 0, block, function, line);
-	ext4_handle_error(sb, force_ro);
+	ext4_handle_error(sb, force_ro, error, 0, block, function, line);
 }
 
 void __ext4_error_inode(struct inode *inode, const char *function,
@@ -741,9 +781,8 @@ void __ext4_error_inode(struct inode *inode, const char *function,
 			       current->comm, &vaf);
 		va_end(args);
 	}
-	save_error_info(inode->i_sb, error, inode->i_ino, block,
-			function, line);
-	ext4_handle_error(inode->i_sb, false);
+	ext4_handle_error(inode->i_sb, false, error, inode->i_ino, block,
+			  function, line);
 }
 
 void __ext4_error_file(struct file *file, const char *function,
@@ -780,9 +819,8 @@ void __ext4_error_file(struct file *file, const char *function,
 			       current->comm, path, &vaf);
 		va_end(args);
 	}
-	save_error_info(inode->i_sb, EFSCORRUPTED, inode->i_ino, block,
-			function, line);
-	ext4_handle_error(inode->i_sb, false);
+	ext4_handle_error(inode->i_sb, false, EFSCORRUPTED, inode->i_ino, block,
+			  function, line);
 }
 
 const char *ext4_decode_error(struct super_block *sb, int errno,
@@ -849,8 +887,7 @@ void __ext4_std_error(struct super_block *sb, const char *function,
 		       sb->s_id, function, line, errstr);
 	}
 
-	save_error_info(sb, -errno, 0, 0, function, line);
-	ext4_handle_error(sb, false);
+	ext4_handle_error(sb, false, -errno, 0, 0, function, line);
 }
 
 void __ext4_msg(struct super_block *sb,
@@ -944,13 +981,16 @@ __acquires(bitlock)
 	if (test_opt(sb, ERRORS_CONT)) {
 		if (test_opt(sb, WARN_ON_ERROR))
 			WARN_ON_ONCE(1);
-		__save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
-		schedule_work(&EXT4_SB(sb)->s_error_work);
+		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+		if (!bdev_read_only(sb->s_bdev)) {
+			save_error_info(sb, EFSCORRUPTED, ino, block, function,
+					line);
+			schedule_work(&EXT4_SB(sb)->s_error_work);
+		}
 		return;
 	}
 	ext4_unlock_group(sb, grp);
-	save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
-	ext4_handle_error(sb, false);
+	ext4_handle_error(sb, false, EFSCORRUPTED, ino, block, function, line);
 	/*
 	 * We only get here in the ERRORS_RO case; relocking the group
 	 * may be dangerous, but nothing bad will happen since the
@@ -1152,7 +1192,7 @@ static void ext4_put_super(struct super_block *sb)
 		es->s_state = cpu_to_le16(sbi->s_mount_state);
 	}
 	if (!sb_rdonly(sb))
-		ext4_commit_super(sb, 1);
+		ext4_commit_super(sb);
 
 	rcu_read_lock();
 	group_desc = rcu_dereference(sbi->s_group_desc);
@@ -2642,7 +2682,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
 	if (sbi->s_journal)
 		ext4_set_feature_journal_needs_recovery(sb);
 
-	err = ext4_commit_super(sb, 1);
+	err = ext4_commit_super(sb);
 done:
 	if (test_opt(sb, DEBUG))
 		printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
@@ -4868,7 +4908,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) &&
 	    !ext4_has_feature_encrypt(sb)) {
 		ext4_set_feature_encrypt(sb);
-		ext4_commit_super(sb, 1);
+		ext4_commit_super(sb);
 	}
 
 	/*
@@ -5418,7 +5458,7 @@ static int ext4_load_journal(struct super_block *sb,
 		es->s_journal_dev = cpu_to_le32(journal_devnum);
 
 		/* Make sure we flush the recovery flag to disk. */
-		ext4_commit_super(sb, 1);
+		ext4_commit_super(sb);
 	}
 
 	return 0;
@@ -5428,16 +5468,14 @@ static int ext4_load_journal(struct super_block *sb,
 	return err;
 }
 
-static int ext4_commit_super(struct super_block *sb, int sync)
+/* Copy state of EXT4_SB(sb) into buffer for on-disk superblock */
+static void ext4_update_super(struct super_block *sb)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
-	struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
-	int error = 0;
-
-	if (!sbh || block_device_ejected(sb))
-		return error;
+	struct ext4_super_block *es = sbi->s_es;
+	struct buffer_head *sbh = sbi->s_sbh;
 
+	lock_buffer(sbh);
 	/*
 	 * If the file system is mounted read-only, don't update the
 	 * superblock write time.  This avoids updating the superblock
@@ -5451,17 +5489,17 @@ static int ext4_commit_super(struct super_block *sb, int sync)
 	if (!(sb->s_flags & SB_RDONLY))
 		ext4_update_tstamp(es, s_wtime);
 	es->s_kbytes_written =
-		cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
+		cpu_to_le64(sbi->s_kbytes_written +
 		    ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
-		      EXT4_SB(sb)->s_sectors_written_start) >> 1));
-	if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter))
+		      sbi->s_sectors_written_start) >> 1));
+	if (percpu_counter_initialized(&sbi->s_freeclusters_counter))
 		ext4_free_blocks_count_set(es,
-			EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
-				&EXT4_SB(sb)->s_freeclusters_counter)));
-	if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
+			EXT4_C2B(sbi, percpu_counter_sum_positive(
+				&sbi->s_freeclusters_counter)));
+	if (percpu_counter_initialized(&sbi->s_freeinodes_counter))
 		es->s_free_inodes_count =
 			cpu_to_le32(percpu_counter_sum_positive(
-				&EXT4_SB(sb)->s_freeinodes_counter));
+				&sbi->s_freeinodes_counter));
 	/* Copy error information to the on-disk superblock */
 	spin_lock(&sbi->s_error_lock);
 	if (sbi->s_add_error_count > 0) {
@@ -5502,10 +5540,20 @@ static int ext4_commit_super(struct super_block *sb, int sync)
 	}
 	spin_unlock(&sbi->s_error_lock);
 
-	BUFFER_TRACE(sbh, "marking dirty");
 	ext4_superblock_csum_set(sb);
-	if (sync)
-		lock_buffer(sbh);
+	unlock_buffer(sbh);
+}
+
+static int ext4_commit_super(struct super_block *sb)
+{
+	struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
+	int error = 0;
+
+	if (!sbh || block_device_ejected(sb))
+		return error;
+
+	ext4_update_super(sb);
+
 	if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
 		/*
 		 * Oh, dear.  A previous attempt to write the
@@ -5520,17 +5568,15 @@ static int ext4_commit_super(struct super_block *sb, int sync)
 		clear_buffer_write_io_error(sbh);
 		set_buffer_uptodate(sbh);
 	}
+	BUFFER_TRACE(sbh, "marking dirty");
 	mark_buffer_dirty(sbh);
-	if (sync) {
-		unlock_buffer(sbh);
-		error = __sync_dirty_buffer(sbh,
-			REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0));
-		if (buffer_write_io_error(sbh)) {
-			ext4_msg(sb, KERN_ERR, "I/O error while writing "
-			       "superblock");
-			clear_buffer_write_io_error(sbh);
-			set_buffer_uptodate(sbh);
-		}
+	error = __sync_dirty_buffer(sbh,
+		REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0));
+	if (buffer_write_io_error(sbh)) {
+		ext4_msg(sb, KERN_ERR, "I/O error while writing "
+		       "superblock");
+		clear_buffer_write_io_error(sbh);
+		set_buffer_uptodate(sbh);
 	}
 	return error;
 }
@@ -5561,7 +5607,7 @@ static int ext4_mark_recovery_complete(struct super_block *sb,
 
 	if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) {
 		ext4_clear_feature_journal_needs_recovery(sb);
-		ext4_commit_super(sb, 1);
+		ext4_commit_super(sb);
 	}
 out:
 	jbd2_journal_unlock_updates(journal);
@@ -5603,7 +5649,7 @@ static int ext4_clear_journal_err(struct super_block *sb,
 
 		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
-		ext4_commit_super(sb, 1);
+		ext4_commit_super(sb);
 
 		jbd2_journal_clear_err(journal);
 		jbd2_journal_update_sb_errno(journal);
@@ -5705,7 +5751,7 @@ static int ext4_freeze(struct super_block *sb)
 		ext4_clear_feature_journal_needs_recovery(sb);
 	}
 
-	error = ext4_commit_super(sb, 1);
+	error = ext4_commit_super(sb);
 out:
 	if (journal)
 		/* we rely on upper layer to stop further updates */
@@ -5727,7 +5773,7 @@ static int ext4_unfreeze(struct super_block *sb)
 		ext4_set_feature_journal_needs_recovery(sb);
 	}
 
-	ext4_commit_super(sb, 1);
+	ext4_commit_super(sb);
 	return 0;
 }
 
@@ -5987,7 +6033,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 	}
 
 	if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
-		err = ext4_commit_super(sb, 1);
+		err = ext4_commit_super(sb);
 		if (err)
 			goto restore_opts;
 	}
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 4e3b1f8c2e81eaaf5489ee96f2c63a5d6e8681da..372208500f4e766f0167ce4597f777dafc6891f2 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -792,8 +792,11 @@ static void ext4_xattr_update_super_block(handle_t *handle,
 
 	BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
 	if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
+		lock_buffer(EXT4_SB(sb)->s_sbh);
 		ext4_set_feature_xattr(sb);
-		ext4_handle_dirty_super(handle, sb);
+		ext4_superblock_csum_set(sb);
+		unlock_buffer(EXT4_SB(sb)->s_sbh);
+		ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
 	}
 }
 
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 2f305c097bd5bb20cec05e633b201e07b99ade29..985a9e3f976d3bf1e8b6da3aebbb6495fe875dc9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -354,6 +354,7 @@ struct io_ring_ctx {
 		unsigned		cq_entries;
 		unsigned		cq_mask;
 		atomic_t		cq_timeouts;
+		unsigned		cq_last_tm_flush;
 		unsigned long		cq_check_overflow;
 		struct wait_queue_head	cq_wait;
 		struct fasync_struct	*cq_fasync;
@@ -1106,6 +1107,9 @@ static void io_sq_thread_drop_mm_files(void)
 
 static int __io_sq_thread_acquire_files(struct io_ring_ctx *ctx)
 {
+	if (current->flags & PF_EXITING)
+		return -EFAULT;
+
 	if (!current->files) {
 		struct files_struct *files;
 		struct nsproxy *nsproxy;
@@ -1133,6 +1137,8 @@ static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx)
 {
 	struct mm_struct *mm;
 
+	if (current->flags & PF_EXITING)
+		return -EFAULT;
 	if (current->mm)
 		return 0;
 
@@ -1634,19 +1640,38 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx)
 
 static void io_flush_timeouts(struct io_ring_ctx *ctx)
 {
-	while (!list_empty(&ctx->timeout_list)) {
+	u32 seq;
+
+	if (list_empty(&ctx->timeout_list))
+		return;
+
+	seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
+
+	do {
+		u32 events_needed, events_got;
 		struct io_kiocb *req = list_first_entry(&ctx->timeout_list,
 						struct io_kiocb, timeout.list);
 
 		if (io_is_timeout_noseq(req))
 			break;
-		if (req->timeout.target_seq != ctx->cached_cq_tail
-					- atomic_read(&ctx->cq_timeouts))
+
+		/*
+		 * Since seq can easily wrap around over time, subtract
+		 * the last seq at which timeouts were flushed before comparing.
+		 * Assuming not more than 2^31-1 events have happened since,
+		 * these subtractions won't have wrapped, so we can check if
+		 * target is in [last_seq, current_seq] by comparing the two.
+		 */
+		events_needed = req->timeout.target_seq - ctx->cq_last_tm_flush;
+		events_got = seq - ctx->cq_last_tm_flush;
+		if (events_got < events_needed)
 			break;
 
 		list_del_init(&req->timeout.list);
 		io_kill_timeout(req);
-	}
+	} while (!list_empty(&ctx->timeout_list));
+
+	ctx->cq_last_tm_flush = seq;
 }
 
 static void io_commit_cqring(struct io_ring_ctx *ctx)
@@ -5832,6 +5857,12 @@ static int io_timeout(struct io_kiocb *req)
 	tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
 	req->timeout.target_seq = tail + off;
 
+	/* Update the last seq here in case io_flush_timeouts() hasn't.
+	 * This is safe because ->completion_lock is held, and submissions
+	 * and completions are never mixed in the same ->completion_lock section.
+	 */
+	ctx->cq_last_tm_flush = tail;
+
 	/*
 	 * Insertion sort, ensuring the first entry in the list is always
 	 * the one we need first.
@@ -7056,6 +7087,7 @@ static int io_sq_thread(void *data)
 
 		if (sqt_spin || !time_after(jiffies, timeout)) {
 			io_run_task_work();
+			io_sq_thread_drop_mm_files();
 			cond_resched();
 			if (sqt_spin)
 				timeout = jiffies + sqd->sq_thread_idle;
@@ -7093,6 +7125,7 @@ static int io_sq_thread(void *data)
 	}
 
 	io_run_task_work();
+	io_sq_thread_drop_mm_files();
 
 	if (cur_css)
 		io_sq_thread_unassociate_blkcg();
@@ -8888,7 +8921,8 @@ static void io_disable_sqo_submit(struct io_ring_ctx *ctx)
 	mutex_unlock(&ctx->uring_lock);
 
 	/* make sure callers enter the ring to get error */
-	io_ring_set_wakeup_flag(ctx);
+	if (ctx->rings)
+		io_ring_set_wakeup_flag(ctx);
 }
 
 /*
@@ -9067,6 +9101,7 @@ void __io_uring_task_cancel(void)
 		finish_wait(&tctx->wait, &wait);
 	} while (1);
 
+	finish_wait(&tctx->wait, &wait);
 	atomic_dec(&tctx->in_idle);
 
 	io_uring_remove_task_files(tctx);
@@ -9700,6 +9735,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 	 */
 	ret = io_uring_install_fd(ctx, file);
 	if (ret < 0) {
+		io_disable_sqo_submit(ctx);
 		/* fput will clean it up */
 		fput(file);
 		return ret;
diff --git a/fs/namespace.c b/fs/namespace.c
index d2db7dfe232b3ecd299187ed477ad2fd7a0fd1a5..9d33909d0f9e34d8a54c7d0d1d7b44652cc8b69e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1713,8 +1713,6 @@ static int can_umount(const struct path *path, int flags)
 {
 	struct mount *mnt = real_mount(path->mnt);
 
-	if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
-		return -EINVAL;
 	if (!may_mount())
 		return -EPERM;
 	if (path->dentry != path->mnt->mnt_root)
@@ -1728,6 +1726,7 @@ static int can_umount(const struct path *path, int flags)
 	return 0;
 }
 
+// caller is responsible for flags being sane
 int path_umount(struct path *path, int flags)
 {
 	struct mount *mnt = real_mount(path->mnt);
@@ -1749,6 +1748,10 @@ static int ksys_umount(char __user *name, int flags)
 	struct path path;
 	int ret;
 
+	// basic validity checks done first
+	if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
+		return -EINVAL;
+
 	if (!(flags & UMOUNT_NOFOLLOW))
 		lookup_flags |= LOOKUP_FOLLOW;
 	ret = user_path_at(AT_FDCWD, name, lookup_flags, &path);
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 821db21ba072cd7dbf7a70c69bd8f0ee378758f6..34b880211e5eab8e63fd3d034a9a3b6aa8b4fa40 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -865,9 +865,14 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
 	if (isdotent(name, namlen)) {
 		if (namlen == 2) {
 			dchild = dget_parent(dparent);
-			/* filesystem root - cannot return filehandle for ".." */
+			/*
+			 * Don't return filehandle for ".." if we're at
+			 * the filesystem or export root:
+			 */
 			if (dchild == dparent)
 				goto out;
+			if (dparent == exp->ex_path.dentry)
+				goto out;
 		} else
 			dchild = dget(dparent);
 	} else
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ee5a235b30562721d9800dbfafc3b958d51d8135..602e3a52884d883c639420e426b539fd74ef35c5 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1035,6 +1035,25 @@ struct clear_refs_private {
 };
 
 #ifdef CONFIG_MEM_SOFT_DIRTY
+
+#define is_cow_mapping(flags) (((flags) & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE)
+
+static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
+{
+	struct page *page;
+
+	if (!pte_write(pte))
+		return false;
+	if (!is_cow_mapping(vma->vm_flags))
+		return false;
+	if (likely(!atomic_read(&vma->vm_mm->has_pinned)))
+		return false;
+	page = vm_normal_page(vma, addr, pte);
+	if (!page)
+		return false;
+	return page_maybe_dma_pinned(page);
+}
+
 static inline void clear_soft_dirty(struct vm_area_struct *vma,
 		unsigned long addr, pte_t *pte)
 {
@@ -1049,6 +1068,8 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
 	if (pte_present(ptent)) {
 		pte_t old_pte;
 
+		if (pte_is_pinned(vma, addr, ptent))
+			return;
 		old_pte = ptep_modify_prot_start(vma, addr, pte);
 		ptent = pte_wrprotect(old_pte);
 		ptent = pte_clear_soft_dirty(ptent);
@@ -1215,41 +1236,26 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 			.type = type,
 		};
 
+		if (mmap_write_lock_killable(mm)) {
+			count = -EINTR;
+			goto out_mm;
+		}
 		if (type == CLEAR_REFS_MM_HIWATER_RSS) {
-			if (mmap_write_lock_killable(mm)) {
-				count = -EINTR;
-				goto out_mm;
-			}
-
 			/*
 			 * Writing 5 to /proc/pid/clear_refs resets the peak
 			 * resident set size to this mm's current rss value.
 			 */
 			reset_mm_hiwater_rss(mm);
-			mmap_write_unlock(mm);
-			goto out_mm;
+			goto out_unlock;
 		}
 
-		if (mmap_read_lock_killable(mm)) {
-			count = -EINTR;
-			goto out_mm;
-		}
 		tlb_gather_mmu(&tlb, mm, 0, -1);
 		if (type == CLEAR_REFS_SOFT_DIRTY) {
 			for (vma = mm->mmap; vma; vma = vma->vm_next) {
 				if (!(vma->vm_flags & VM_SOFTDIRTY))
 					continue;
-				mmap_read_unlock(mm);
-				if (mmap_write_lock_killable(mm)) {
-					count = -EINTR;
-					goto out_mm;
-				}
-				for (vma = mm->mmap; vma; vma = vma->vm_next) {
-					vma->vm_flags &= ~VM_SOFTDIRTY;
-					vma_set_page_prot(vma);
-				}
-				mmap_write_downgrade(mm);
-				break;
+				vma->vm_flags &= ~VM_SOFTDIRTY;
+				vma_set_page_prot(vma);
 			}
 
 			mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
@@ -1261,7 +1267,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 		if (type == CLEAR_REFS_SOFT_DIRTY)
 			mmu_notifier_invalidate_range_end(&range);
 		tlb_finish_mmu(&tlb, 0, -1);
-		mmap_read_unlock(mm);
+out_unlock:
+		mmap_write_unlock(mm);
 out_mm:
 		mmput(mm);
 	}
diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h
index dd90c9792909d1db39c2a8a52cc78216a034f1ba..0e7316a86240b8c309d6b49ba0d8c1663d2159e5 100644
--- a/include/asm-generic/bitops/atomic.h
+++ b/include/asm-generic/bitops/atomic.h
@@ -11,19 +11,19 @@
  * See Documentation/atomic_bitops.txt for details.
  */
 
-static inline void set_bit(unsigned int nr, volatile unsigned long *p)
+static __always_inline void set_bit(unsigned int nr, volatile unsigned long *p)
 {
 	p += BIT_WORD(nr);
 	atomic_long_or(BIT_MASK(nr), (atomic_long_t *)p);
 }
 
-static inline void clear_bit(unsigned int nr, volatile unsigned long *p)
+static __always_inline void clear_bit(unsigned int nr, volatile unsigned long *p)
 {
 	p += BIT_WORD(nr);
 	atomic_long_andnot(BIT_MASK(nr), (atomic_long_t *)p);
 }
 
-static inline void change_bit(unsigned int nr, volatile unsigned long *p)
+static __always_inline void change_bit(unsigned int nr, volatile unsigned long *p)
 {
 	p += BIT_WORD(nr);
 	atomic_long_xor(BIT_MASK(nr), (atomic_long_t *)p);
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 74c6c0486eed782e267b1addd65669f1422bad65..555ab0fddbef7dc937a178e49621598736408c05 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -13,6 +13,12 @@
 /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 */
 #if GCC_VERSION < 40900
 # error Sorry, your version of GCC is too old - please use 4.9 or newer.
+#elif defined(CONFIG_ARM64) && GCC_VERSION < 50100
+/*
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63293
+ * https://lore.kernel.org/r/20210107111841.GN1551@shell.armlinux.org.uk
+ */
+# error Sorry, your version of GCC is too old - please use 5.1 or newer.
 #endif
 
 /*
diff --git a/include/linux/dm-bufio.h b/include/linux/dm-bufio.h
index 29d255fdd5d641cf31eefefcc3d47744e87734d3..90bd558a17f516780329589ea6a07d62b9909c5a 100644
--- a/include/linux/dm-bufio.h
+++ b/include/linux/dm-bufio.h
@@ -150,6 +150,7 @@ void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n);
 
 unsigned dm_bufio_get_block_size(struct dm_bufio_client *c);
 sector_t dm_bufio_get_device_size(struct dm_bufio_client *c);
+struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c);
 sector_t dm_bufio_get_block_number(struct dm_buffer *b);
 void *dm_bufio_get_block_data(struct dm_buffer *b);
 void *dm_bufio_get_aux_data(struct dm_buffer *b);
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 5e0655fb2a6f7767572f6b82275abfd306a4b386..fe1ae73ff8b574a8ee31e03b07f06cb6d5ca2473 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -35,8 +35,12 @@ struct kunit_kasan_expectation {
 #define KASAN_SHADOW_INIT 0
 #endif
 
+#ifndef PTE_HWTABLE_PTRS
+#define PTE_HWTABLE_PTRS 0
+#endif
+
 extern unsigned char kasan_early_shadow_page[PAGE_SIZE];
-extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE];
+extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE + PTE_HWTABLE_PTRS];
 extern pmd_t kasan_early_shadow_pmd[PTRS_PER_PMD];
 extern pud_t kasan_early_shadow_pud[PTRS_PER_PUD];
 extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D];
diff --git a/include/linux/mdio-bitbang.h b/include/linux/mdio-bitbang.h
index 5d71e8a8500f5ed13872c19b01cf7a8af9d62c11..aca4dc037b70b7284e6f997264c4c66d2701d8ac 100644
--- a/include/linux/mdio-bitbang.h
+++ b/include/linux/mdio-bitbang.h
@@ -35,6 +35,9 @@ struct mdiobb_ctrl {
 	const struct mdiobb_ops *ops;
 };
 
+int mdiobb_read(struct mii_bus *bus, int phy, int reg);
+int mdiobb_write(struct mii_bus *bus, int phy, int reg, u16 val);
+
 /* The returned bus is not yet registered with the phy layer. */
 struct mii_bus *alloc_mdio_bitbang(struct mdiobb_ctrl *ctrl);
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d827bd7f3bfe3a01e3c638ab43fa9673ab0b4cc3..eeb0b52203e929bb0c0e4f869abf395d29d5760d 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -665,7 +665,7 @@ static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
 {
 	struct mem_cgroup *memcg = page_memcg(page);
 
-	VM_WARN_ON_ONCE_PAGE(!memcg, page);
+	VM_WARN_ON_ONCE_PAGE(!memcg && !mem_cgroup_disabled(), page);
 	return mem_cgroup_lruvec(memcg, pgdat);
 }
 
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index bf7966776c5577fd4a92a8c0837943b4f0aa5c54..505480217cf1a9b4f779b454d42bc78c464e189b 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -163,8 +163,6 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn);
 static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; }
 #endif
 
-bool arm_pmu_irq_is_nmi(void);
-
 /* Internal functions only for core arm_pmu code */
 struct arm_pmu *armpmu_alloc(void);
 struct arm_pmu *armpmu_alloc_atomic(void);
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 88a7673894d5e624e7ffa51eb3c16d9043080e88..cfbfd6fe01dfad22f81c8090c4a9dfdcb1d6a28a 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -81,6 +81,8 @@ struct usbnet {
 #		define EVENT_LINK_CHANGE	11
 #		define EVENT_SET_RX_MODE	12
 #		define EVENT_NO_IP_ALIGN	13
+	u32			rx_speed;	/* in bps - NOT Mbps */
+	u32			tx_speed;	/* in bps - NOT Mbps */
 };
 
 static inline struct usb_driver *driver_of(struct usb_interface *intf)
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 9a4bbccddc7f7fff5a86109eef2dd2f831a3b5ad..0d6f7ec860615c6146d677bda5a652ad205c03f2 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1756,7 +1756,7 @@ struct cfg80211_sar_specs {
 
 
 /**
- * @struct cfg80211_sar_chan_ranges - sar frequency ranges
+ * struct cfg80211_sar_freq_ranges - sar frequency ranges
  * @start_freq:  start range edge frequency
  * @end_freq:    end range edge frequency
  */
@@ -3972,6 +3972,8 @@ struct mgmt_frame_regs {
  *	This callback may sleep.
  * @reset_tid_config: Reset TID specific configuration for the peer, for the
  *	given TIDs. This callback may sleep.
+ *
+ * @set_sar_specs: Update the SAR (TX power) settings.
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -4929,6 +4931,7 @@ struct wiphy_iftype_akm_suites {
  * @max_data_retry_count: maximum supported per TID retry count for
  *	configuration through the %NL80211_TID_CONFIG_ATTR_RETRY_SHORT and
  *	%NL80211_TID_CONFIG_ATTR_RETRY_LONG attributes
+ * @sar_capa: SAR control capabilities
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 7338b3865a2a3d278dc27c0167bba1b966bbda9f..111d7771b208150d4e6f445b2675648b4de0eb7d 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -76,6 +76,8 @@ struct inet_connection_sock_af_ops {
  * @icsk_ext_hdr_len:	   Network protocol overhead (IP/IPv6 options)
  * @icsk_ack:		   Delayed ACK control data
  * @icsk_mtup;		   MTU probing control data
+ * @icsk_probes_tstamp:    Probe timestamp (cleared by non-zero window ack)
+ * @icsk_user_timeout:	   TCP_USER_TIMEOUT value
  */
 struct inet_connection_sock {
 	/* inet_sock has to be the first member! */
@@ -129,6 +131,7 @@ struct inet_connection_sock {
 
 		u32		  probe_timestamp;
 	} icsk_mtup;
+	u32			  icsk_probes_tstamp;
 	u32			  icsk_user_timeout;
 
 	u64			  icsk_ca_priv[104 / sizeof(u64)];
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index d315740581f1ebc46de1e4180cc6ad9023cf1897..2bdbf62f4ecd7b4e7e62e45263176733fc5db02a 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3880,6 +3880,7 @@ enum ieee80211_reconfig_type {
  *	This callback may sleep.
  * @sta_set_4addr: Called to notify the driver when a station starts/stops using
  *	4-address mode
+ * @set_sar_specs: Update the SAR (TX power) settings.
  */
 struct ieee80211_ops {
 	void (*tx)(struct ieee80211_hw *hw,
diff --git a/include/net/sock.h b/include/net/sock.h
index bdc4323ce53c957c30809b0773b2d558dce555b7..129d200bccb46581f9959a8ee553acbed94466cf 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1921,10 +1921,13 @@ static inline void sk_set_txhash(struct sock *sk)
 	sk->sk_txhash = net_tx_rndhash();
 }
 
-static inline void sk_rethink_txhash(struct sock *sk)
+static inline bool sk_rethink_txhash(struct sock *sk)
 {
-	if (sk->sk_txhash)
+	if (sk->sk_txhash) {
 		sk_set_txhash(sk);
+		return true;
+	}
+	return false;
 }
 
 static inline struct dst_entry *
@@ -1947,12 +1950,10 @@ sk_dst_get(struct sock *sk)
 	return dst;
 }
 
-static inline void dst_negative_advice(struct sock *sk)
+static inline void __dst_negative_advice(struct sock *sk)
 {
 	struct dst_entry *ndst, *dst = __sk_dst_get(sk);
 
-	sk_rethink_txhash(sk);
-
 	if (dst && dst->ops->negative_advice) {
 		ndst = dst->ops->negative_advice(dst);
 
@@ -1964,6 +1965,12 @@ static inline void dst_negative_advice(struct sock *sk)
 	}
 }
 
+static inline void dst_negative_advice(struct sock *sk)
+{
+	sk_rethink_txhash(sk);
+	__dst_negative_advice(sk);
+}
+
 static inline void
 __sk_dst_set(struct sock *sk, struct dst_entry *dst)
 {
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 58994e01302216d9b935f6ac3a3bb4e7fb794077..6f89c27265f5895587416a2ff1aae928c18543c0 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1424,13 +1424,61 @@ TRACE_EVENT(rpcb_unregister,
 	)
 );
 
+/* Record an xdr_buf containing a fully-formed RPC message */
+DECLARE_EVENT_CLASS(svc_xdr_msg_class,
+	TP_PROTO(
+		const struct xdr_buf *xdr
+	),
+
+	TP_ARGS(xdr),
+
+	TP_STRUCT__entry(
+		__field(u32, xid)
+		__field(const void *, head_base)
+		__field(size_t, head_len)
+		__field(const void *, tail_base)
+		__field(size_t, tail_len)
+		__field(unsigned int, page_len)
+		__field(unsigned int, msg_len)
+	),
+
+	TP_fast_assign(
+		__be32 *p = (__be32 *)xdr->head[0].iov_base;
+
+		__entry->xid = be32_to_cpu(*p);
+		__entry->head_base = p;
+		__entry->head_len = xdr->head[0].iov_len;
+		__entry->tail_base = xdr->tail[0].iov_base;
+		__entry->tail_len = xdr->tail[0].iov_len;
+		__entry->page_len = xdr->page_len;
+		__entry->msg_len = xdr->len;
+	),
+
+	TP_printk("xid=0x%08x head=[%p,%zu] page=%u tail=[%p,%zu] len=%u",
+		__entry->xid,
+		__entry->head_base, __entry->head_len, __entry->page_len,
+		__entry->tail_base, __entry->tail_len, __entry->msg_len
+	)
+);
+
+#define DEFINE_SVCXDRMSG_EVENT(name)					\
+		DEFINE_EVENT(svc_xdr_msg_class,				\
+				svc_xdr_##name,				\
+				TP_PROTO(				\
+					const struct xdr_buf *xdr	\
+				),					\
+				TP_ARGS(xdr))
+
+DEFINE_SVCXDRMSG_EVENT(recvfrom);
+
+/* Record an xdr_buf containing arbitrary data, tagged with an XID */
 DECLARE_EVENT_CLASS(svc_xdr_buf_class,
 	TP_PROTO(
-		const struct svc_rqst *rqst,
+		__be32 xid,
 		const struct xdr_buf *xdr
 	),
 
-	TP_ARGS(rqst, xdr),
+	TP_ARGS(xid, xdr),
 
 	TP_STRUCT__entry(
 		__field(u32, xid)
@@ -1443,7 +1491,7 @@ DECLARE_EVENT_CLASS(svc_xdr_buf_class,
 	),
 
 	TP_fast_assign(
-		__entry->xid = be32_to_cpu(rqst->rq_xid);
+		__entry->xid = be32_to_cpu(xid);
 		__entry->head_base = xdr->head[0].iov_base;
 		__entry->head_len = xdr->head[0].iov_len;
 		__entry->tail_base = xdr->tail[0].iov_base;
@@ -1463,12 +1511,11 @@ DECLARE_EVENT_CLASS(svc_xdr_buf_class,
 		DEFINE_EVENT(svc_xdr_buf_class,				\
 				svc_xdr_##name,				\
 				TP_PROTO(				\
-					const struct svc_rqst *rqst,	\
+					__be32 xid,			\
 					const struct xdr_buf *xdr	\
 				),					\
-				TP_ARGS(rqst, xdr))
+				TP_ARGS(xid, xdr))
 
-DEFINE_SVCXDRBUF_EVENT(recvfrom);
 DEFINE_SVCXDRBUF_EVENT(sendto);
 
 /*
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index 00c7235ae93e7daae3ce106f76fc5dfce1cd1bdd..2c43b0ef1e4d504a652d0d574d5b6e41fa107091 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -192,7 +192,7 @@ void xs_suspend_cancel(void);
 
 struct work_struct;
 
-void xenbus_probe(struct work_struct *);
+void xenbus_probe(void);
 
 #define XENBUS_IS_ERR_READ(str) ({			\
 	if (!IS_ERR(str) && strlen(str) == 0) {		\
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index 6edff97ad594bd5d6e98a8971723a4c07436db1b..2f0597320b6d279871834c2deea3a662736b7373 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -176,14 +176,14 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
 	 * bpf_local_storage_update expects the owner to have a
 	 * valid storage pointer.
 	 */
-	if (!inode_storage_ptr(inode))
+	if (!inode || !inode_storage_ptr(inode))
 		return (unsigned long)NULL;
 
 	sdata = inode_storage_lookup(inode, map, true);
 	if (sdata)
 		return (unsigned long)sdata->data;
 
-	/* This helper must only called from where the inode is gurranteed
+	/* This helper must only called from where the inode is guaranteed
 	 * to have a refcount and cannot be freed.
 	 */
 	if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
@@ -200,7 +200,10 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
 BPF_CALL_2(bpf_inode_storage_delete,
 	   struct bpf_map *, map, struct inode *, inode)
 {
-	/* This helper must only called from where the inode is gurranteed
+	if (!inode)
+		return -EINVAL;
+
+	/* This helper must only called from where the inode is guaranteed
 	 * to have a refcount and cannot be freed.
 	 */
 	return inode_storage_delete(inode, map);
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index 4ef1959a78f27fcc610b1d31dcc6f4ca489f2f72..e0da0258b732dd318304a6357984ec6f50947f42 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -218,7 +218,7 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
 	 * bpf_local_storage_update expects the owner to have a
 	 * valid storage pointer.
 	 */
-	if (!task_storage_ptr(task))
+	if (!task || !task_storage_ptr(task))
 		return (unsigned long)NULL;
 
 	sdata = task_storage_lookup(task, map, true);
@@ -243,6 +243,9 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
 BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
 	   task)
 {
+	if (!task)
+		return -EINVAL;
+
 	/* This helper must only be called from places where the lifetime of the task
 	 * is guaranteed. Either by being refcounted or by being protected
 	 * by an RCU read-side critical section.
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 7ccc0133723a03d4a4565d3249df3267feb523a4..8962f988514f5f7e473f9d522a5063db9142deb8 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -4172,7 +4172,7 @@ static int btf_parse_hdr(struct btf_verifier_env *env)
 		return -ENOTSUPP;
 	}
 
-	if (btf_data_size == hdr->hdr_len) {
+	if (!btf->base_btf && btf_data_size == hdr->hdr_len) {
 		btf_verifier_log(env, "No data");
 		return -EINVAL;
 	}
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 6ec088a96302f980ee7e56feaf626ad078d03a77..96555a8a2c545435aee44fd2fe4bbc37532357c5 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1391,12 +1391,13 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
 		if (ctx.optlen != 0) {
 			*optlen = ctx.optlen;
 			*kernel_optval = ctx.optval;
+			/* export and don't free sockopt buf */
+			return 0;
 		}
 	}
 
 out:
-	if (ret)
-		sockopt_free_buf(&ctx);
+	sockopt_free_buf(&ctx);
 	return ret;
 }
 
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index bd8a3183d0302efd7147e84b30054cf9aa6c10e5..41ca280b1dc19428d626ef16ed8db8eab58892df 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -108,7 +108,7 @@ BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
 }
 
 const struct bpf_func_proto bpf_map_peek_elem_proto = {
-	.func		= bpf_map_pop_elem,
+	.func		= bpf_map_peek_elem,
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_CONST_MAP_PTR,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c3bb03c8371fc7aabc60245780582a6fed05b12a..e5999d86c76ea17ca49a4cf8851cf51cc20d9a3d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2712,7 +2712,6 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
 out_put_prog:
 	if (tgt_prog_fd && tgt_prog)
 		bpf_prog_put(tgt_prog);
-	bpf_prog_put(prog);
 	return err;
 }
 
@@ -2825,7 +2824,10 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
 			tp_name = prog->aux->attach_func_name;
 			break;
 		}
-		return bpf_tracing_prog_attach(prog, 0, 0);
+		err = bpf_tracing_prog_attach(prog, 0, 0);
+		if (err >= 0)
+			return err;
+		goto out_put_prog;
 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
 	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
 		if (strncpy_from_user(buf,
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 0f82d5d46e2cd5f6456ddc88e29db378ee31fd03..785d25392ead7a411be645e810f43c0307377b2f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2217,6 +2217,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
 	case PTR_TO_RDWR_BUF:
 	case PTR_TO_RDWR_BUF_OR_NULL:
 	case PTR_TO_PERCPU_BTF_ID:
+	case PTR_TO_MEM:
+	case PTR_TO_MEM_OR_NULL:
 		return true;
 	default:
 		return false;
@@ -5352,7 +5354,7 @@ static bool signed_add_overflows(s64 a, s64 b)
 	return res < a;
 }
 
-static bool signed_add32_overflows(s64 a, s64 b)
+static bool signed_add32_overflows(s32 a, s32 b)
 {
 	/* Do the add in u32, where overflow is well-defined */
 	s32 res = (s32)((u32)a + (u32)b);
@@ -5362,7 +5364,7 @@ static bool signed_add32_overflows(s64 a, s64 b)
 	return res < a;
 }
 
-static bool signed_sub_overflows(s32 a, s32 b)
+static bool signed_sub_overflows(s64 a, s64 b)
 {
 	/* Do the sub in u64, where overflow is well-defined */
 	s64 res = (s64)((u64)a - (u64)b);
@@ -5374,7 +5376,7 @@ static bool signed_sub_overflows(s32 a, s32 b)
 
 static bool signed_sub32_overflows(s32 a, s32 b)
 {
-	/* Do the sub in u64, where overflow is well-defined */
+	/* Do the sub in u32, where overflow is well-defined */
 	s32 res = (s32)((u32)a - (u32)b);
 
 	if (b < 0)
diff --git a/kernel/signal.c b/kernel/signal.c
index 5736c55aaa1afc60d21294c79a0c8ad5d70efc09..6b9c431da08fe92d7d9c13866b4ddb2d767385b8 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2550,6 +2550,9 @@ bool get_signal(struct ksignal *ksig)
 	struct signal_struct *signal = current->signal;
 	int signr;
 
+	if (unlikely(current->task_works))
+		task_work_run();
+
 	/*
 	 * For non-generic architectures, check for TIF_NOTIFY_SIGNAL so
 	 * that the arch handlers don't all have to do it. If we get here
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 1635111c5bd2af92ae6683b97040ebf941266a28..a21e6a5792c5a53fcdb3ffd29328d9d5f6d5afad 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1658,7 +1658,7 @@ static int copy_compat_iovec_from_user(struct iovec *iov,
 		(const struct compat_iovec __user *)uvec;
 	int ret = -EFAULT, i;
 
-	if (!user_access_begin(uvec, nr_segs * sizeof(*uvec)))
+	if (!user_access_begin(uiov, nr_segs * sizeof(*uiov)))
 		return -EFAULT;
 
 	for (i = 0; i < nr_segs; i++) {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a2602969873dcf6c6bc989bcccb2136bc45348b0..18f6ee3179002a7f218dfafb4e7724685ff1c215 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4371,7 +4371,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
 		 * So we need to block hugepage fault by PG_hwpoison bit check.
 		 */
 		if (unlikely(PageHWPoison(page))) {
-			ret = VM_FAULT_HWPOISON |
+			ret = VM_FAULT_HWPOISON_LARGE |
 				VM_FAULT_SET_HINDEX(hstate_index(h));
 			goto backout_unlocked;
 		}
diff --git a/mm/kasan/init.c b/mm/kasan/init.c
index bc0ad208b3a7a3751f00c46f3365fab7528fe949..7ca0b92d5886dca033e6b2a050441fd01b71c048 100644
--- a/mm/kasan/init.c
+++ b/mm/kasan/init.c
@@ -64,7 +64,8 @@ static inline bool kasan_pmd_table(pud_t pud)
 	return false;
 }
 #endif
-pte_t kasan_early_shadow_pte[PTRS_PER_PTE] __page_aligned_bss;
+pte_t kasan_early_shadow_pte[PTRS_PER_PTE + PTE_HWTABLE_PTRS]
+	__page_aligned_bss;
 
 static inline bool kasan_pte_table(pmd_t pmd)
 {
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 5a38e9eade946b66b98730ec57ff80efeec93004..04d9f154a130d28f38d26dfb0a897656d747cd71 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1940,7 +1940,7 @@ int soft_offline_page(unsigned long pfn, int flags)
 			goto retry;
 		}
 	} else if (ret == -EIO) {
-		pr_info("%s: %#lx: unknown page type: %lx (%pGP)\n",
+		pr_info("%s: %#lx: unknown page type: %lx (%pGp)\n",
 			 __func__, pfn, page->flags, &page->flags);
 	}
 
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 8cf96bd21341cf30e186c5bb1f893ab267149860..2c3a8650205343c54e451b98e5d7022c2c4b7109 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1111,7 +1111,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
 		     const nodemask_t *to, int flags)
 {
 	int busy = 0;
-	int err;
+	int err = 0;
 	nodemask_t tmp;
 
 	migrate_prep();
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bdbec4c981738dc4e821564d32abc0d897de29d1..027f6481ba59bca36ff024dbbf62cfa3f7ef04e9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2862,20 +2862,20 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
 {
 	struct page *page;
 
-#ifdef CONFIG_CMA
-	/*
-	 * Balance movable allocations between regular and CMA areas by
-	 * allocating from CMA when over half of the zone's free memory
-	 * is in the CMA area.
-	 */
-	if (alloc_flags & ALLOC_CMA &&
-	    zone_page_state(zone, NR_FREE_CMA_PAGES) >
-	    zone_page_state(zone, NR_FREE_PAGES) / 2) {
-		page = __rmqueue_cma_fallback(zone, order);
-		if (page)
-			return page;
+	if (IS_ENABLED(CONFIG_CMA)) {
+		/*
+		 * Balance movable allocations between regular and CMA areas by
+		 * allocating from CMA when over half of the zone's free memory
+		 * is in the CMA area.
+		 */
+		if (alloc_flags & ALLOC_CMA &&
+		    zone_page_state(zone, NR_FREE_CMA_PAGES) >
+		    zone_page_state(zone, NR_FREE_PAGES) / 2) {
+			page = __rmqueue_cma_fallback(zone, order);
+			if (page)
+				goto out;
+		}
 	}
-#endif
 retry:
 	page = __rmqueue_smallest(zone, order, migratetype);
 	if (unlikely(!page)) {
@@ -2886,8 +2886,9 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
 								alloc_flags))
 			goto retry;
 	}
-
-	trace_mm_page_alloc_zone_locked(page, order, migratetype);
+out:
+	if (page)
+		trace_mm_page_alloc_zone_locked(page, order, migratetype);
 	return page;
 }
 
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index 4bcc119580890753d27d25807a18ab58b520b002..f5fee9cf90f8bb35d81a1424947c9299b09959a0 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -9,6 +9,7 @@
 #include <linux/mm.h>
 #include <linux/uio.h>
 #include <linux/sched.h>
+#include <linux/compat.h>
 #include <linux/sched/mm.h>
 #include <linux/highmem.h>
 #include <linux/ptrace.h>
diff --git a/mm/slub.c b/mm/slub.c
index dc5b42e700b853eabd668d56fd04f9eb64c5c4d0..d9e4e10683cc129c33c67864e75c926a5a7bf9d6 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1973,7 +1973,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
 
 		t = acquire_slab(s, n, page, object == NULL, &objects);
 		if (!t)
-			break;
+			continue; /* cmpxchg raced */
 
 		available += objects;
 		if (!object) {
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 4d88fe5a277ac2d0e520f37f4dc7ec0a779fc9fe..e6f352bf0498248a5f36c991f56c80ac9cf290c3 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2420,8 +2420,10 @@ void *vmap(struct page **pages, unsigned int count,
 		return NULL;
 	}
 
-	if (flags & VM_MAP_PUT_PAGES)
+	if (flags & VM_MAP_PUT_PAGES) {
 		area->pages = pages;
+		area->nr_pages = count;
+	}
 	return area->addr;
 }
 EXPORT_SYMBOL(vmap);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 257cba79a96dd024251478235b237f60b048cb70..b1b574ad199d2ca8bde196e73de0a129e8a61e43 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1238,6 +1238,8 @@ static unsigned int shrink_page_list(struct list_head *page_list,
 			if (!PageSwapCache(page)) {
 				if (!(sc->gfp_mask & __GFP_IO))
 					goto keep_locked;
+				if (page_maybe_dma_pinned(page))
+					goto keep_locked;
 				if (PageTransHuge(page)) {
 					/* cannot split THP, skip it */
 					if (!can_split_huge_page(page, NULL))
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 23dfb2010ba69e6fbaa07a99f1ff85adff8a4832..58bcb8c849d5492ba5694ed0ac7b8f0246c089c6 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -272,7 +272,8 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
 	    kattr->test.repeat)
 		return -EINVAL;
 
-	if (ctx_size_in < prog->aux->max_ctx_offset)
+	if (ctx_size_in < prog->aux->max_ctx_offset ||
+	    ctx_size_in > MAX_BPF_FUNC_ARGS * sizeof(u64))
 		return -EINVAL;
 
 	if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 && cpu != 0)
diff --git a/net/core/dev.c b/net/core/dev.c
index 0332f2e8f7da00579af8e1629aa3d04158791b3b..07a0347c33fb0c46f38d9d73250b5fc1060b1b0e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -9690,6 +9690,11 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
 		}
 	}
 
+	if ((features & NETIF_F_HW_TLS_RX) && !(features & NETIF_F_RXCSUM)) {
+		netdev_dbg(dev, "Dropping TLS RX HW offload feature since no RXCSUM feature.\n");
+		features &= ~NETIF_F_HW_TLS_RX;
+	}
+
 	return features;
 }
 
diff --git a/net/core/devlink.c b/net/core/devlink.c
index ee828e4b1007e9809e2495a759b869dafb8daa14..738d4344d6799a0dda3051e265da84fcabc0d86c 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4146,7 +4146,7 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
 static int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb,
 					      struct genl_info *info)
 {
-	struct devlink_port *devlink_port = info->user_ptr[0];
+	struct devlink_port *devlink_port = info->user_ptr[1];
 	struct devlink_param_item *param_item;
 	struct sk_buff *msg;
 	int err;
@@ -4175,7 +4175,7 @@ static int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb,
 static int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb,
 					      struct genl_info *info)
 {
-	struct devlink_port *devlink_port = info->user_ptr[0];
+	struct devlink_port *devlink_port = info->user_ptr[1];
 
 	return __devlink_nl_cmd_param_set_doit(devlink_port->devlink,
 					       devlink_port->index,
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 80dbf2f4016e26824bc968115503ca2072933f63..8e582e29a41e39809cc534865bb3c91c05b3d9f2 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -80,11 +80,11 @@ static void est_timer(struct timer_list *t)
 	u64 rate, brate;
 
 	est_fetch_counters(est, &b);
-	brate = (b.bytes - est->last_bytes) << (10 - est->ewma_log - est->intvl_log);
-	brate -= (est->avbps >> est->ewma_log);
+	brate = (b.bytes - est->last_bytes) << (10 - est->intvl_log);
+	brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log);
 
-	rate = (b.packets - est->last_packets) << (10 - est->ewma_log - est->intvl_log);
-	rate -= (est->avpps >> est->ewma_log);
+	rate = (b.packets - est->last_packets) << (10 - est->intvl_log);
+	rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log);
 
 	write_seqcount_begin(&est->seq);
 	est->avbps += brate;
@@ -143,6 +143,9 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 	if (parm->interval < -2 || parm->interval > 3)
 		return -EINVAL;
 
+	if (parm->ewma_log == 0 || parm->ewma_log >= 31)
+		return -EINVAL;
+
 	est = kzalloc(sizeof(*est), GFP_KERNEL);
 	if (!est)
 		return -ENOBUFS;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index cf2c4dcf425790f51625a6a1c8e0bb21ce7db919..145503d3f06b342ff937ce9690da77fbabcb07c8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -437,7 +437,11 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
 
 	len += NET_SKB_PAD;
 
-	if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
+	/* If requested length is either too small or too big,
+	 * we use kmalloc() for skb->head allocation.
+	 */
+	if (len <= SKB_WITH_OVERHEAD(1024) ||
+	    len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
 	    (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
 		skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
 		if (!skb)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index fd8b8800a2c3022666f46b9ba2ac984f7cf6b04d..6bd7ca09af03dd5385096f749cf05afecb4b7795 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -851,6 +851,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
 		newicsk->icsk_retransmits = 0;
 		newicsk->icsk_backoff	  = 0;
 		newicsk->icsk_probes_out  = 0;
+		newicsk->icsk_probes_tstamp = 0;
 
 		/* Deinitialize accept_queue to trap illegal accesses. */
 		memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index cc23f1ce239c28ac9c12ef1a5ef5316407e2bee9..8cd3224d913e0ca5cc0c227e813b5f279fed5545 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -76,7 +76,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	flow.daddr = iph->saddr;
 	flow.saddr = rpfilter_get_saddr(iph->daddr);
 	flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
-	flow.flowi4_tos = RT_TOS(iph->tos);
+	flow.flowi4_tos = iph->tos & IPTOS_RT_MASK;
 	flow.flowi4_scope = RT_SCOPE_UNIVERSE;
 	flow.flowi4_oif = l3mdev_master_ifindex_rcu(xt_in(par));
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2267d21c73a6dede5e15fd45c207a9b4adbaeaff..856ae516ac18b2aee22ccf14fd0e8a5a1b1fed83 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2937,6 +2937,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 
 	icsk->icsk_backoff = 0;
 	icsk->icsk_probes_out = 0;
+	icsk->icsk_probes_tstamp = 0;
 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
 	icsk->icsk_rto_min = TCP_RTO_MIN;
 	icsk->icsk_delack_max = TCP_DELACK_MAX;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c7e16b0ed791fcbd864860d6216339542e286929..a7dfca0a38cd7a0cdbe0a1c6d9b17866a33ebdc1 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3384,6 +3384,7 @@ static void tcp_ack_probe(struct sock *sk)
 		return;
 	if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
 		icsk->icsk_backoff = 0;
+		icsk->icsk_probes_tstamp = 0;
 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
 		/* Socket must be waked up by subsequent tcp_data_snd_check().
 		 * This function is not for random using!
@@ -4396,10 +4397,9 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
 	 * The receiver remembers and reflects via DSACKs. Leverage the
 	 * DSACK state and change the txhash to re-route speculatively.
 	 */
-	if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq) {
-		sk_rethink_txhash(sk);
+	if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq &&
+	    sk_rethink_txhash(sk))
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH);
-	}
 }
 
 static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 58207c7769d05693b650e3c93e4ef405a5d4b23a..777306b5bc224d6e36da13641b5cdf4f0dba81fd 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1595,6 +1595,8 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
 		tcp_move_syn(newtp, req);
 		ireq->ireq_opt = NULL;
 	} else {
+		newinet->inet_opt = NULL;
+
 		if (!req_unhash && found_dup_sk) {
 			/* This code path should only be executed in the
 			 * syncookie case only
@@ -1602,8 +1604,6 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
 			bh_unlock_sock(newsk);
 			sock_put(newsk);
 			newsk = NULL;
-		} else {
-			newinet->inet_opt = NULL;
 		}
 	}
 	return newsk;
@@ -1760,6 +1760,7 @@ int tcp_v4_early_demux(struct sk_buff *skb)
 bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
 	u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf);
+	u32 tail_gso_size, tail_gso_segs;
 	struct skb_shared_info *shinfo;
 	const struct tcphdr *th;
 	struct tcphdr *thtail;
@@ -1767,6 +1768,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
 	unsigned int hdrlen;
 	bool fragstolen;
 	u32 gso_segs;
+	u32 gso_size;
 	int delta;
 
 	/* In case all data was pulled from skb frags (in __pskb_pull_tail()),
@@ -1792,13 +1794,6 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
 	 */
 	th = (const struct tcphdr *)skb->data;
 	hdrlen = th->doff * 4;
-	shinfo = skb_shinfo(skb);
-
-	if (!shinfo->gso_size)
-		shinfo->gso_size = skb->len - hdrlen;
-
-	if (!shinfo->gso_segs)
-		shinfo->gso_segs = 1;
 
 	tail = sk->sk_backlog.tail;
 	if (!tail)
@@ -1821,6 +1816,15 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
 		goto no_coalesce;
 
 	__skb_pull(skb, hdrlen);
+
+	shinfo = skb_shinfo(skb);
+	gso_size = shinfo->gso_size ?: skb->len;
+	gso_segs = shinfo->gso_segs ?: 1;
+
+	shinfo = skb_shinfo(tail);
+	tail_gso_size = shinfo->gso_size ?: (tail->len - hdrlen);
+	tail_gso_segs = shinfo->gso_segs ?: 1;
+
 	if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) {
 		TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq;
 
@@ -1847,11 +1851,8 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
 		}
 
 		/* Not as strict as GRO. We only need to carry mss max value */
-		skb_shinfo(tail)->gso_size = max(shinfo->gso_size,
-						 skb_shinfo(tail)->gso_size);
-
-		gso_segs = skb_shinfo(tail)->gso_segs + shinfo->gso_segs;
-		skb_shinfo(tail)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
+		shinfo->gso_size = max(gso_size, tail_gso_size);
+		shinfo->gso_segs = min_t(u32, gso_segs + tail_gso_segs, 0xFFFF);
 
 		sk->sk_backlog.len += delta;
 		__NET_INC_STATS(sock_net(sk),
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 899d053cb10ed305d866f233d8fd133b8b62b379..b0a6b0b30a6bf02183eb3a20a54abe552634c09c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -4084,6 +4084,7 @@ void tcp_send_probe0(struct sock *sk)
 		/* Cancel probe timer, if it is not required. */
 		icsk->icsk_probes_out = 0;
 		icsk->icsk_backoff = 0;
+		icsk->icsk_probes_tstamp = 0;
 		return;
 	}
 
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 6c62b9ea1320d9bbd26ed86b9f41de02fee6c491..faa92948441ba28a821034f18a6e4cc252da2c99 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -219,14 +219,8 @@ static int tcp_write_timeout(struct sock *sk)
 	int retry_until;
 
 	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
-		if (icsk->icsk_retransmits) {
-			dst_negative_advice(sk);
-		} else {
-			sk_rethink_txhash(sk);
-			tp->timeout_rehash++;
-			__NET_INC_STATS(sock_net(sk),
-					LINUX_MIB_TCPTIMEOUTREHASH);
-		}
+		if (icsk->icsk_retransmits)
+			__dst_negative_advice(sk);
 		retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
 		expired = icsk->icsk_retransmits >= retry_until;
 	} else {
@@ -234,12 +228,7 @@ static int tcp_write_timeout(struct sock *sk)
 			/* Black hole detection */
 			tcp_mtu_probing(icsk, sk);
 
-			dst_negative_advice(sk);
-		} else {
-			sk_rethink_txhash(sk);
-			tp->timeout_rehash++;
-			__NET_INC_STATS(sock_net(sk),
-					LINUX_MIB_TCPTIMEOUTREHASH);
+			__dst_negative_advice(sk);
 		}
 
 		retry_until = net->ipv4.sysctl_tcp_retries2;
@@ -270,6 +259,11 @@ static int tcp_write_timeout(struct sock *sk)
 		return 1;
 	}
 
+	if (sk_rethink_txhash(sk)) {
+		tp->timeout_rehash++;
+		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTREHASH);
+	}
+
 	return 0;
 }
 
@@ -349,6 +343,7 @@ static void tcp_probe_timer(struct sock *sk)
 
 	if (tp->packets_out || !skb) {
 		icsk->icsk_probes_out = 0;
+		icsk->icsk_probes_tstamp = 0;
 		return;
 	}
 
@@ -360,13 +355,12 @@ static void tcp_probe_timer(struct sock *sk)
 	 * corresponding system limit. We also implement similar policy when
 	 * we use RTO to probe window in tcp_retransmit_timer().
 	 */
-	if (icsk->icsk_user_timeout) {
-		u32 elapsed = tcp_model_timeout(sk, icsk->icsk_probes_out,
-						tcp_probe0_base(sk));
-
-		if (elapsed >= icsk->icsk_user_timeout)
-			goto abort;
-	}
+	if (!icsk->icsk_probes_tstamp)
+		icsk->icsk_probes_tstamp = tcp_jiffies32;
+	else if (icsk->icsk_user_timeout &&
+		 (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >=
+		 msecs_to_jiffies(icsk->icsk_user_timeout))
+		goto abort;
 
 	max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
 	if (sock_flag(sk, SOCK_DEAD)) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7103b0a89756e24203261684e88432615c344581..69ea76578abb95c613ef20f4fc0751e1ff81ac7b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2555,7 +2555,8 @@ int udp_v4_early_demux(struct sk_buff *skb)
 		 */
 		if (!inet_sk(sk)->inet_daddr && in_dev)
 			return ip_mc_validate_source(skb, iph->daddr,
-						     iph->saddr, iph->tos,
+						     iph->saddr,
+						     iph->tos & IPTOS_RT_MASK,
 						     skb->dev, in_dev, &itag);
 	}
 	return 0;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index eff2cacd52093962d17bb9798b21ed20d7b1a995..9edc5bb2d531aeffef448ef049ed8a745fe61544 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2467,8 +2467,9 @@ static void addrconf_add_mroute(struct net_device *dev)
 		.fc_ifindex = dev->ifindex,
 		.fc_dst_len = 8,
 		.fc_flags = RTF_UP,
-		.fc_type = RTN_UNICAST,
+		.fc_type = RTN_MULTICAST,
 		.fc_nlinfo.nl_net = dev_net(dev),
+		.fc_protocol = RTPROT_KERNEL,
 	};
 
 	ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 48f144f107d536eb96d08f438ad21e5a9e3c8c1e..9e723d943421907abb6664bbdad92c9b416efde0 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -120,18 +120,17 @@ static ssize_t aqm_write(struct file *file,
 {
 	struct ieee80211_local *local = file->private_data;
 	char buf[100];
-	size_t len;
 
-	if (count > sizeof(buf))
+	if (count >= sizeof(buf))
 		return -EINVAL;
 
 	if (copy_from_user(buf, user_buf, count))
 		return -EFAULT;
 
-	buf[sizeof(buf) - 1] = '\0';
-	len = strlen(buf);
-	if (len > 0 && buf[len-1] == '\n')
-		buf[len-1] = 0;
+	if (count && buf[count - 1] == '\n')
+		buf[count - 1] = '\0';
+	else
+		buf[count] = '\0';
 
 	if (sscanf(buf, "fq_limit %u", &local->fq.limit) == 1)
 		return count;
@@ -177,18 +176,17 @@ static ssize_t airtime_flags_write(struct file *file,
 {
 	struct ieee80211_local *local = file->private_data;
 	char buf[16];
-	size_t len;
 
-	if (count > sizeof(buf))
+	if (count >= sizeof(buf))
 		return -EINVAL;
 
 	if (copy_from_user(buf, user_buf, count))
 		return -EFAULT;
 
-	buf[sizeof(buf) - 1] = 0;
-	len = strlen(buf);
-	if (len > 0 && buf[len - 1] == '\n')
-		buf[len - 1] = 0;
+	if (count && buf[count - 1] == '\n')
+		buf[count - 1] = '\0';
+	else
+		buf[count] = '\0';
 
 	if (kstrtou16(buf, 0, &local->airtime_flags))
 		return -EINVAL;
@@ -237,20 +235,19 @@ static ssize_t aql_txq_limit_write(struct file *file,
 {
 	struct ieee80211_local *local = file->private_data;
 	char buf[100];
-	size_t len;
 	u32 ac, q_limit_low, q_limit_high, q_limit_low_old, q_limit_high_old;
 	struct sta_info *sta;
 
-	if (count > sizeof(buf))
+	if (count >= sizeof(buf))
 		return -EINVAL;
 
 	if (copy_from_user(buf, user_buf, count))
 		return -EFAULT;
 
-	buf[sizeof(buf) - 1] = 0;
-	len = strlen(buf);
-	if (len > 0 && buf[len - 1] == '\n')
-		buf[len - 1] = 0;
+	if (count && buf[count - 1] == '\n')
+		buf[count - 1] = '\0';
+	else
+		buf[count] = '\0';
 
 	if (sscanf(buf, "%u %u %u", &ac, &q_limit_low, &q_limit_high) != 3)
 		return -EINVAL;
@@ -306,18 +303,17 @@ static ssize_t force_tx_status_write(struct file *file,
 {
 	struct ieee80211_local *local = file->private_data;
 	char buf[3];
-	size_t len;
 
-	if (count > sizeof(buf))
+	if (count >= sizeof(buf))
 		return -EINVAL;
 
 	if (copy_from_user(buf, user_buf, count))
 		return -EFAULT;
 
-	buf[sizeof(buf) - 1] = '\0';
-	len = strlen(buf);
-	if (len > 0 && buf[len - 1] == '\n')
-		buf[len - 1] = 0;
+	if (count && buf[count - 1] == '\n')
+		buf[count - 1] = '\0';
+	else
+		buf[count] = '\0';
 
 	if (buf[0] == '0' && buf[1] == '\0')
 		local->force_tx_status = 0;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 13b9bcc4865deb38644d15bbe0cdc4ee28f5bdd2..972895e9f22dc5e4b977087bba880f9a40fcdcdb 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4176,6 +4176,8 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
 
 	rcu_read_lock();
 	key = rcu_dereference(sta->ptk[sta->ptk_idx]);
+	if (!key)
+		key = rcu_dereference(sdata->default_unicast_key);
 	if (key) {
 		switch (key->conf.cipher) {
 		case WLAN_CIPHER_SUITE_TKIP:
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 6422da6690f795890286113c3e6a46fbe271d4d7..ebb3228ce9718fd52bb2e481874508397fea47d0 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -649,7 +649,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 		if (!skip_hw && tx->key &&
 		    tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)
 			info->control.hw_key = &tx->key->conf;
-	} else if (!ieee80211_is_mgmt(hdr->frame_control) && tx->sta &&
+	} else if (ieee80211_is_data_present(hdr->frame_control) && tx->sta &&
 		   test_sta_flag(tx->sta, WLAN_STA_USES_ENCRYPTION)) {
 		return TX_DROP;
 	}
@@ -3809,7 +3809,7 @@ void __ieee80211_schedule_txq(struct ieee80211_hw *hw,
 		 * get immediately moved to the back of the list on the next
 		 * call to ieee80211_next_txq().
 		 */
-		if (txqi->txq.sta &&
+		if (txqi->txq.sta && local->airtime_flags &&
 		    wiphy_ext_feature_isset(local->hw.wiphy,
 					    NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
 			list_add(&txqi->schedule_order,
@@ -4251,7 +4251,6 @@ netdev_tx_t ieee80211_subif_start_xmit_8023(struct sk_buff *skb,
 	struct ethhdr *ehdr = (struct ethhdr *)skb->data;
 	struct ieee80211_key *key;
 	struct sta_info *sta;
-	bool offload = true;
 
 	if (unlikely(skb->len < ETH_HLEN)) {
 		kfree_skb(skb);
@@ -4267,18 +4266,22 @@ netdev_tx_t ieee80211_subif_start_xmit_8023(struct sk_buff *skb,
 
 	if (unlikely(IS_ERR_OR_NULL(sta) || !sta->uploaded ||
 	    !test_sta_flag(sta, WLAN_STA_AUTHORIZED) ||
-		sdata->control_port_protocol == ehdr->h_proto))
-		offload = false;
-	else if ((key = rcu_dereference(sta->ptk[sta->ptk_idx])) &&
-		 (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) ||
-		  key->conf.cipher == WLAN_CIPHER_SUITE_TKIP))
-		offload = false;
-
-	if (offload)
-		ieee80211_8023_xmit(sdata, dev, sta, key, skb);
-	else
-		ieee80211_subif_start_xmit(skb, dev);
+	    sdata->control_port_protocol == ehdr->h_proto))
+		goto skip_offload;
+
+	key = rcu_dereference(sta->ptk[sta->ptk_idx]);
+	if (!key)
+		key = rcu_dereference(sdata->default_unicast_key);
+
+	if (key && (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) ||
+		    key->conf.cipher == WLAN_CIPHER_SUITE_TKIP))
+		goto skip_offload;
+
+	ieee80211_8023_xmit(sdata, dev, sta, key, skb);
+	goto out;
 
+skip_offload:
+	ieee80211_subif_start_xmit(skb, dev);
 out:
 	rcu_read_unlock();
 
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 79bebf4b0796915b514f7a8283b941c6db0d17e8..59257400697d1d3f532701856a84d1c8f8539527 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -508,7 +508,7 @@ static int nci_open_device(struct nci_dev *ndev)
 		};
 		unsigned long opt = 0;
 
-		if (!(ndev->nci_ver & NCI_VER_2_MASK))
+		if (ndev->nci_ver & NCI_VER_2_MASK)
 			opt = (unsigned long)&nci_init_v2_cmd;
 
 		rc = __nci_request(ndev, nci_init_req, opt,
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 1319986693fc8826330e0a7dc49143d1d7518d64..84f932532db7dc39e23d946cd73e97ba042795a4 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1272,6 +1272,10 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
 
 		nla_opt_msk = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
 		msk_depth = nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
+		if (!nla_ok(nla_opt_msk, msk_depth)) {
+			NL_SET_ERR_MSG(extack, "Invalid nested attribute for masks");
+			return -EINVAL;
+		}
 	}
 
 	nla_for_each_attr(nla_opt_key, nla_enc_key,
@@ -1307,9 +1311,6 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
 				NL_SET_ERR_MSG(extack, "Key and mask miss aligned");
 				return -EINVAL;
 			}
-
-			if (msk_depth)
-				nla_opt_msk = nla_next(nla_opt_msk, &msk_depth);
 			break;
 		case TCA_FLOWER_KEY_ENC_OPTS_VXLAN:
 			if (key->enc_opts.dst_opt_type) {
@@ -1340,9 +1341,6 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
 				NL_SET_ERR_MSG(extack, "Key and mask miss aligned");
 				return -EINVAL;
 			}
-
-			if (msk_depth)
-				nla_opt_msk = nla_next(nla_opt_msk, &msk_depth);
 			break;
 		case TCA_FLOWER_KEY_ENC_OPTS_ERSPAN:
 			if (key->enc_opts.dst_opt_type) {
@@ -1373,14 +1371,20 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
 				NL_SET_ERR_MSG(extack, "Key and mask miss aligned");
 				return -EINVAL;
 			}
-
-			if (msk_depth)
-				nla_opt_msk = nla_next(nla_opt_msk, &msk_depth);
 			break;
 		default:
 			NL_SET_ERR_MSG(extack, "Unknown tunnel option type");
 			return -EINVAL;
 		}
+
+		if (!msk_depth)
+			continue;
+
+		if (!nla_ok(nla_opt_msk, msk_depth)) {
+			NL_SET_ERR_MSG(extack, "A mask attribute is invalid");
+			return -EINVAL;
+		}
+		nla_opt_msk = nla_next(nla_opt_msk, &msk_depth);
 	}
 
 	return 0;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 78bec347b8b66f660e620dd715d0eb68f9bcd2d3..c4007b9cd16d6a200d943e3e0536d6b20022ba77 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -366,9 +366,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 	if (tb[TCA_TCINDEX_MASK])
 		cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
 
-	if (tb[TCA_TCINDEX_SHIFT])
+	if (tb[TCA_TCINDEX_SHIFT]) {
 		cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
-
+		if (cp->shift > 16) {
+			err = -EINVAL;
+			goto errout;
+		}
+	}
 	if (!cp->hash) {
 		/* Hash not specified, use perfect hash if the upper limit
 		 * of the hashing index is below the threshold.
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 51cb553e4317a3e2bca1996e0df004aab8111d58..6fe4e5cc807c90b046a16f014df43bfe841cbc43 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -412,7 +412,8 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
 {
 	struct qdisc_rate_table *rtab;
 
-	if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
+	if (tab == NULL || r->rate == 0 ||
+	    r->cell_log == 0 || r->cell_log >= 32 ||
 	    nla_len(tab) != TC_RTAB_SIZE) {
 		NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
 		return NULL;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 5fb9164aa69059f77158a9f6ec796381474641ae..dcc50ae545506f39d04908f6168c36ddda350f6f 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -857,6 +857,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 	err = -EAGAIN;
 	if (len <= 0)
 		goto out_release;
+	trace_svc_xdr_recvfrom(&rqstp->rq_arg);
 
 	clear_bit(XPT_OLD, &xprt->xpt_flags);
 
@@ -866,7 +867,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 
 	if (serv->sv_stats)
 		serv->sv_stats->netcnt++;
-	trace_svc_xdr_recvfrom(rqstp, &rqstp->rq_arg);
 	return len;
 out_release:
 	rqstp->rq_res.len = 0;
@@ -904,7 +904,7 @@ int svc_send(struct svc_rqst *rqstp)
 	xb->len = xb->head[0].iov_len +
 		xb->page_len +
 		xb->tail[0].iov_len;
-	trace_svc_xdr_sendto(rqstp, xb);
+	trace_svc_xdr_sendto(rqstp->rq_xid, xb);
 	trace_svc_stats_latency(rqstp);
 
 	len = xprt->xpt_ops->xpo_sendto(rqstp);
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index bb72447ad960271ab1aaec43867202763f8bc6f6..8114bba8556c72a1ed173cb7c29232ed9dbdf7a0 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -5,7 +5,7 @@
  * Copyright 2008-2011	Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright      2017  Intel Deutschland GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2021 Intel Corporation
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -139,6 +139,11 @@ static const struct ieee80211_regdomain *get_cfg80211_regdom(void)
 	return rcu_dereference_rtnl(cfg80211_regdomain);
 }
 
+/*
+ * Returns the regulatory domain associated with the wiphy.
+ *
+ * Requires either RTNL or RCU protection
+ */
 const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy)
 {
 	return rcu_dereference_rtnl(wiphy->regd);
@@ -2571,9 +2576,13 @@ void wiphy_apply_custom_regulatory(struct wiphy *wiphy,
 	if (IS_ERR(new_regd))
 		return;
 
+	rtnl_lock();
+
 	tmp = get_wiphy_regdom(wiphy);
 	rcu_assign_pointer(wiphy->regd, new_regd);
 	rcu_free_regdom(tmp);
+
+	rtnl_unlock();
 }
 EXPORT_SYMBOL(wiphy_apply_custom_regulatory);
 
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 8037b04a9edd11cad8845d95a06b2d442c29f8f5..4a83117507f5a867412e350b27577a7832e16c22 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -108,9 +108,9 @@ EXPORT_SYMBOL(xsk_get_pool_from_qid);
 
 void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id)
 {
-	if (queue_id < dev->real_num_rx_queues)
+	if (queue_id < dev->num_rx_queues)
 		dev->_rx[queue_id].pool = NULL;
-	if (queue_id < dev->real_num_tx_queues)
+	if (queue_id < dev->num_tx_queues)
 		dev->_tx[queue_id].pool = NULL;
 }
 
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 7d8026f3f377256292b5b7bd133d865b37f4b5a3..a0cd28cd31a852c50ca7f2500f64d91f6b60f53b 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -275,7 +275,9 @@ static void dump_common_audit_data(struct audit_buffer *ab,
 		struct inode *inode;
 
 		audit_log_format(ab, " name=");
+		spin_lock(&a->u.dentry->d_lock);
 		audit_log_untrustedstring(ab, a->u.dentry->d_name.name);
+		spin_unlock(&a->u.dentry->d_lock);
 
 		inode = d_backing_inode(a->u.dentry);
 		if (inode) {
@@ -293,8 +295,9 @@ static void dump_common_audit_data(struct audit_buffer *ab,
 		dentry = d_find_alias(inode);
 		if (dentry) {
 			audit_log_format(ab, " name=");
-			audit_log_untrustedstring(ab,
-					 dentry->d_name.name);
+			spin_lock(&dentry->d_lock);
+			audit_log_untrustedstring(ab, dentry->d_name.name);
+			spin_unlock(&dentry->d_lock);
 			dput(dentry);
 		}
 		audit_log_format(ab, " dev=");
diff --git a/tools/bootconfig/scripts/bconf2ftrace.sh b/tools/bootconfig/scripts/bconf2ftrace.sh
index 595e164dc352f08d97125f04e49efd89e9281811..feb30c2c788159008c6caa00fe11b48a27b8a9b3 100755
--- a/tools/bootconfig/scripts/bconf2ftrace.sh
+++ b/tools/bootconfig/scripts/bconf2ftrace.sh
@@ -152,6 +152,7 @@ setup_instance() { # [instance]
 	set_array_of ${instance}.options ${instancedir}/trace_options
 	set_value_of ${instance}.trace_clock ${instancedir}/trace_clock
 	set_value_of ${instance}.cpumask ${instancedir}/tracing_cpumask
+	set_value_of ${instance}.tracing_on ${instancedir}/tracing_on
 	set_value_of ${instance}.tracer ${instancedir}/current_tracer
 	set_array_of ${instance}.ftrace.filters \
 		${instancedir}/set_ftrace_filter
diff --git a/tools/bootconfig/scripts/ftrace2bconf.sh b/tools/bootconfig/scripts/ftrace2bconf.sh
index 6c0d4b61e0c260a9e213453b76e887282a1edde1..a0c3bcc6da4f36c24bc29efa4ec8a718d213e3c1 100755
--- a/tools/bootconfig/scripts/ftrace2bconf.sh
+++ b/tools/bootconfig/scripts/ftrace2bconf.sh
@@ -221,6 +221,10 @@ instance_options() { # [instance-name]
 	if [ `echo $val | sed -e s/f//g`x != x ]; then
 		emit_kv $PREFIX.cpumask = $val
 	fi
+	val=`cat $INSTANCE/tracing_on`
+	if [ `echo $val | sed -e s/f//g`x != x ]; then
+		emit_kv $PREFIX.tracing_on = $val
+	fi
 
 	val=
 	for i in `cat $INSTANCE/set_event`; do
diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h
index ce365d212768211b8812054bae5399f464d67acb..cc7070c7439ba6a5327d992a6cebf4fdc591f7ab 100644
--- a/tools/include/linux/build_bug.h
+++ b/tools/include/linux/build_bug.h
@@ -79,9 +79,4 @@
 #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
 #endif // static_assert
 
-#ifdef __GENKSYMS__
-/* genksyms gets confused by _Static_assert */
-#define _Static_assert(expr, ...)
-#endif
-
 #endif	/* _LINUX_BUILD_BUG_H */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 886802b8ffba3617f64b47f7f6f452335fdfa23e..374c67875cdbd5c60eb15c1b932e7fcdc7f9fa7d 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -251,6 +251,7 @@ struct kvm_hyperv_exit {
 #define KVM_EXIT_X86_RDMSR        29
 #define KVM_EXIT_X86_WRMSR        30
 #define KVM_EXIT_DIRTY_RING_FULL  31
+#define KVM_EXIT_AP_RESET_HOLD    32
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -573,6 +574,7 @@ struct kvm_vapic_addr {
 #define KVM_MP_STATE_CHECK_STOP        6
 #define KVM_MP_STATE_OPERATING         7
 #define KVM_MP_STATE_LOAD              8
+#define KVM_MP_STATE_AP_RESET_HOLD     9
 
 struct kvm_mp_state {
 	__u32 mp_state;
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 3c3f2bc6c6528e08fdaa913f1da43e2c02799b48..9970a288dda536768b5878f5f0a2c71dc0af8a97 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -240,11 +240,6 @@ static int btf_parse_hdr(struct btf *btf)
 	}
 
 	meta_left = btf->raw_size - sizeof(*hdr);
-	if (!meta_left) {
-		pr_debug("BTF has no data\n");
-		return -EINVAL;
-	}
-
 	if (meta_left < hdr->str_off + hdr->str_len) {
 		pr_debug("Invalid BTF total size:%u\n", btf->raw_size);
 		return -EINVAL;
diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c
index c8d45091e7c26a3bede00f38dc72b03f83d435b4..c70e9e03af3e9929813017c48df7f063f9f701e4 100644
--- a/tools/lib/perf/tests/test-cpumap.c
+++ b/tools/lib/perf/tests/test-cpumap.c
@@ -27,5 +27,5 @@ int main(int argc, char **argv)
 	perf_cpu_map__put(cpus);
 
 	__T_END;
-	return 0;
+	return tests_failed == 0 ? 0 : -1;
 }
diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c
index 6d8ebe0c25042f7c6902744967c0cb7074db62c8..e2ac0b7f432eaa87aeab1b8952ca27d675ae962a 100644
--- a/tools/lib/perf/tests/test-evlist.c
+++ b/tools/lib/perf/tests/test-evlist.c
@@ -208,13 +208,13 @@ static int test_mmap_thread(void)
 	char path[PATH_MAX];
 	int id, err, pid, go_pipe[2];
 	union perf_event *event;
-	char bf;
 	int count = 0;
 
 	snprintf(path, PATH_MAX, "%s/kernel/debug/tracing/events/syscalls/sys_enter_prctl/id",
 		 sysfs__mountpoint());
 
 	if (filename__read_int(path, &id)) {
+		tests_failed++;
 		fprintf(stderr, "error: failed to get tracepoint id: %s\n", path);
 		return -1;
 	}
@@ -229,6 +229,7 @@ static int test_mmap_thread(void)
 	pid = fork();
 	if (!pid) {
 		int i;
+		char bf;
 
 		read(go_pipe[0], &bf, 1);
 
@@ -266,7 +267,7 @@ static int test_mmap_thread(void)
 	perf_evlist__enable(evlist);
 
 	/* kick the child and wait for it to finish */
-	write(go_pipe[1], &bf, 1);
+	write(go_pipe[1], "A", 1);
 	waitpid(pid, NULL, 0);
 
 	/*
@@ -409,5 +410,5 @@ int main(int argc, char **argv)
 	test_mmap_cpus();
 
 	__T_END;
-	return 0;
+	return tests_failed == 0 ? 0 : -1;
 }
diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c
index 135722ac965bf7dcd75a3d50dc2bc9250cfd89a2..0ad82d7a2a51b690e57d2d28f16531fe7df40609 100644
--- a/tools/lib/perf/tests/test-evsel.c
+++ b/tools/lib/perf/tests/test-evsel.c
@@ -131,5 +131,5 @@ int main(int argc, char **argv)
 	test_stat_thread_enable();
 
 	__T_END;
-	return 0;
+	return tests_failed == 0 ? 0 : -1;
 }
diff --git a/tools/lib/perf/tests/test-threadmap.c b/tools/lib/perf/tests/test-threadmap.c
index 7dc4d6fbeddee5bdc4b0f0316f8b6a1814fa2c6b..384471441b4842e8cd472d2750e267dcbb05ec3f 100644
--- a/tools/lib/perf/tests/test-threadmap.c
+++ b/tools/lib/perf/tests/test-threadmap.c
@@ -27,5 +27,5 @@ int main(int argc, char **argv)
 	perf_thread_map__put(threads);
 
 	__T_END;
-	return 0;
+	return tests_failed == 0 ? 0 : -1;
 }
diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c
index 65c4ff6892d967ab02459ba14f258d7080d6e078..e6b6181c6dc63b29abf7463c7bee52c048e5e112 100644
--- a/tools/perf/examples/bpf/5sec.c
+++ b/tools/perf/examples/bpf/5sec.c
@@ -39,7 +39,7 @@
    Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo <acme@redhat.com>
 */
 
-#include <bpf/bpf.h>
+#include <bpf.h>
 
 #define NSEC_PER_SEC	1000000000L
 
diff --git a/tools/perf/tests/shell/stat+shadow_stat.sh b/tools/perf/tests/shell/stat+shadow_stat.sh
index 249dfe48cf6ad490d1f3066e137d7f843cce00ab..ebebd3596cf99379d92600bcdae786e9c45e3694 100755
--- a/tools/perf/tests/shell/stat+shadow_stat.sh
+++ b/tools/perf/tests/shell/stat+shadow_stat.sh
@@ -9,31 +9,29 @@ perf stat -a true > /dev/null 2>&1 || exit 2
 
 test_global_aggr()
 {
-	local cyc
-
 	perf stat -a --no-big-num -e cycles,instructions sleep 1  2>&1 | \
 	grep -e cycles -e instructions | \
 	while read num evt hash ipc rest
 	do
 		# skip not counted events
-		if [[ $num == "<not" ]]; then
+		if [ "$num" = "<not" ]; then
 			continue
 		fi
 
 		# save cycles count
-		if [[ $evt == "cycles" ]]; then
+		if [ "$evt" = "cycles" ]; then
 			cyc=$num
 			continue
 		fi
 
 		# skip if no cycles
-		if [[ -z $cyc ]]; then
+		if [ -z "$cyc" ]; then
 			continue
 		fi
 
 		# use printf for rounding and a leading zero
-		local res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)`
-		if [[ $ipc != $res ]]; then
+		res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)`
+		if [ "$ipc" != "$res" ]; then
 			echo "IPC is different: $res != $ipc  ($num / $cyc)"
 			exit 1
 		fi
@@ -42,32 +40,32 @@ test_global_aggr()
 
 test_no_aggr()
 {
-	declare -A results
-
 	perf stat -a -A --no-big-num -e cycles,instructions sleep 1  2>&1 | \
 	grep ^CPU | \
 	while read cpu num evt hash ipc rest
 	do
 		# skip not counted events
-		if [[ $num == "<not" ]]; then
+		if [ "$num" = "<not" ]; then
 			continue
 		fi
 
 		# save cycles count
-		if [[ $evt == "cycles" ]]; then
-			results[$cpu]=$num
+		if [ "$evt" = "cycles" ]; then
+			results="$results $cpu:$num"
 			continue
 		fi
 
+		cyc=${results##* $cpu:}
+		cyc=${cyc%% *}
+
 		# skip if no cycles
-		local cyc=${results[$cpu]}
-		if [[ -z $cyc ]]; then
+		if [ -z "$cyc" ]; then
 			continue
 		fi
 
 		# use printf for rounding and a leading zero
-		local res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)`
-		if [[ $ipc != $res ]]; then
+		res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)`
+		if [ "$ipc" != "$res" ]; then
 			echo "IPC is different for $cpu: $res != $ipc  ($num / $cyc)"
 			exit 1
 		fi
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 062383e225a3e9e64f229b739d2c155b5430e28d..c4ed3dc2c8f405323854e81225b2e1b32bbc7f7d 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -3323,6 +3323,14 @@ int perf_session__write_header(struct perf_session *session,
 	attr_offset = lseek(ff.fd, 0, SEEK_CUR);
 
 	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->core.attr.size < sizeof(evsel->core.attr)) {
+			/*
+			 * We are likely in "perf inject" and have read
+			 * from an older file. Update attr size so that
+			 * reader gets the right offset to the ids.
+			 */
+			evsel->core.attr.size = sizeof(evsel->core.attr);
+		}
 		f_attr = (struct perf_file_attr){
 			.attr = evsel->core.attr,
 			.ids  = {
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index f841f3503cae61de55ce1b8dbe912b7b0f8e2f53..1e9d3f982b47742f8c5c043c131945c2b18151cd 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2980,7 +2980,7 @@ int machines__for_each_thread(struct machines *machines,
 
 pid_t machine__get_current_tid(struct machine *machine, int cpu)
 {
-	int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS);
+	int nr_cpus = min(machine->env->nr_cpus_avail, MAX_NR_CPUS);
 
 	if (cpu < 0 || cpu >= nr_cpus || !machine->current_tid)
 		return -1;
@@ -2992,7 +2992,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
 			     pid_t tid)
 {
 	struct thread *thread;
-	int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS);
+	int nr_cpus = min(machine->env->nr_cpus_avail, MAX_NR_CPUS);
 
 	if (cpu < 0)
 		return -EINVAL;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 50ff9795a4f112acb8f73e6750fef60f26d6f48a..25adbcce02814145650819cf9cbf76284c3e434d 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -2404,7 +2404,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
 {
 	int i, err = -1;
 	struct perf_cpu_map *map;
-	int nr_cpus = min(session->header.env.nr_cpus_online, MAX_NR_CPUS);
+	int nr_cpus = min(session->header.env.nr_cpus_avail, MAX_NR_CPUS);
 
 	for (i = 0; i < PERF_TYPE_MAX; ++i) {
 		struct evsel *evsel;
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 901265127e3698126bc9f395058ee4f12fd05419..12eafd12a693c077178be44b5b60620dee0d03cc 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -8,6 +8,7 @@
 #include "evlist.h"
 #include "expr.h"
 #include "metricgroup.h"
+#include "cgroup.h"
 #include <linux/zalloc.h>
 
 /*
@@ -28,6 +29,7 @@ struct saved_value {
 	enum stat_type type;
 	int ctx;
 	int cpu;
+	struct cgroup *cgrp;
 	struct runtime_stat *stat;
 	struct stats stats;
 	u64 metric_total;
@@ -57,6 +59,9 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
 	if (a->ctx != b->ctx)
 		return a->ctx - b->ctx;
 
+	if (a->cgrp != b->cgrp)
+		return (char *)a->cgrp < (char *)b->cgrp ? -1 : +1;
+
 	if (a->evsel == NULL && b->evsel == NULL) {
 		if (a->stat == b->stat)
 			return 0;
@@ -100,7 +105,8 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel,
 					      bool create,
 					      enum stat_type type,
 					      int ctx,
-					      struct runtime_stat *st)
+					      struct runtime_stat *st,
+					      struct cgroup *cgrp)
 {
 	struct rblist *rblist;
 	struct rb_node *nd;
@@ -110,10 +116,15 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel,
 		.type = type,
 		.ctx = ctx,
 		.stat = st,
+		.cgrp = cgrp,
 	};
 
 	rblist = &st->value_list;
 
+	/* don't use context info for clock events */
+	if (type == STAT_NSECS)
+		dm.ctx = 0;
+
 	nd = rblist__find(rblist, &dm);
 	if (nd)
 		return container_of(nd, struct saved_value, rb_node);
@@ -191,12 +202,18 @@ void perf_stat__reset_shadow_per_stat(struct runtime_stat *st)
 	reset_stat(st);
 }
 
+struct runtime_stat_data {
+	int ctx;
+	struct cgroup *cgrp;
+};
+
 static void update_runtime_stat(struct runtime_stat *st,
 				enum stat_type type,
-				int ctx, int cpu, u64 count)
+				int cpu, u64 count,
+				struct runtime_stat_data *rsd)
 {
-	struct saved_value *v = saved_value_lookup(NULL, cpu, true,
-						   type, ctx, st);
+	struct saved_value *v = saved_value_lookup(NULL, cpu, true, type,
+						   rsd->ctx, st, rsd->cgrp);
 
 	if (v)
 		update_stats(&v->stats, count);
@@ -210,82 +227,86 @@ static void update_runtime_stat(struct runtime_stat *st,
 void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
 				    int cpu, struct runtime_stat *st)
 {
-	int ctx = evsel_context(counter);
 	u64 count_ns = count;
 	struct saved_value *v;
+	struct runtime_stat_data rsd = {
+		.ctx = evsel_context(counter),
+		.cgrp = counter->cgrp,
+	};
 
 	count *= counter->scale;
 
 	if (evsel__is_clock(counter))
-		update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns);
+		update_runtime_stat(st, STAT_NSECS, cpu, count_ns, &rsd);
 	else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
-		update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count);
+		update_runtime_stat(st, STAT_CYCLES, cpu, count, &rsd);
 	else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
-		update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count);
+		update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu, count, &rsd);
 	else if (perf_stat_evsel__is(counter, TRANSACTION_START))
-		update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count);
+		update_runtime_stat(st, STAT_TRANSACTION, cpu, count, &rsd);
 	else if (perf_stat_evsel__is(counter, ELISION_START))
-		update_runtime_stat(st, STAT_ELISION, ctx, cpu, count);
+		update_runtime_stat(st, STAT_ELISION, cpu, count, &rsd);
 	else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
 		update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS,
-				    ctx, cpu, count);
+				    cpu, count, &rsd);
 	else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
 		update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED,
-				    ctx, cpu, count);
+				    cpu, count, &rsd);
 	else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
 		update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED,
-				    ctx, cpu, count);
+				    cpu, count, &rsd);
 	else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
 		update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES,
-				    ctx, cpu, count);
+				    cpu, count, &rsd);
 	else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
 		update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
-				    ctx, cpu, count);
+				    cpu, count, &rsd);
 	else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING))
 		update_runtime_stat(st, STAT_TOPDOWN_RETIRING,
-				    ctx, cpu, count);
+				    cpu, count, &rsd);
 	else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC))
 		update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC,
-				    ctx, cpu, count);
+				    cpu, count, &rsd);
 	else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND))
 		update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND,
-				    ctx, cpu, count);
+				    cpu, count, &rsd);
 	else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND))
 		update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND,
-				    ctx, cpu, count);
+				    cpu, count, &rsd);
 	else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
 		update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
-				    ctx, cpu, count);
+				    cpu, count, &rsd);
 	else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
 		update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
-				    ctx, cpu, count);
+				    cpu, count, &rsd);
 	else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
-		update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count);
+		update_runtime_stat(st, STAT_BRANCHES, cpu, count, &rsd);
 	else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
-		update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count);
+		update_runtime_stat(st, STAT_CACHEREFS, cpu, count, &rsd);
 	else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
-		update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count);
+		update_runtime_stat(st, STAT_L1_DCACHE, cpu, count, &rsd);
 	else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
-		update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count);
+		update_runtime_stat(st, STAT_L1_ICACHE, cpu, count, &rsd);
 	else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL))
-		update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count);
+		update_runtime_stat(st, STAT_LL_CACHE, cpu, count, &rsd);
 	else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
-		update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count);
+		update_runtime_stat(st, STAT_DTLB_CACHE, cpu, count, &rsd);
 	else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
-		update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count);
+		update_runtime_stat(st, STAT_ITLB_CACHE, cpu, count, &rsd);
 	else if (perf_stat_evsel__is(counter, SMI_NUM))
-		update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count);
+		update_runtime_stat(st, STAT_SMI_NUM, cpu, count, &rsd);
 	else if (perf_stat_evsel__is(counter, APERF))
-		update_runtime_stat(st, STAT_APERF, ctx, cpu, count);
+		update_runtime_stat(st, STAT_APERF, cpu, count, &rsd);
 
 	if (counter->collect_stat) {
-		v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st);
+		v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st,
+				       rsd.cgrp);
 		update_stats(&v->stats, count);
 		if (counter->metric_leader)
 			v->metric_total += count;
 	} else if (counter->metric_leader) {
 		v = saved_value_lookup(counter->metric_leader,
-				       cpu, true, STAT_NONE, 0, st);
+				       cpu, true, STAT_NONE, 0, st, rsd.cgrp);
 		v->metric_total += count;
 		v->metric_other++;
 	}
@@ -422,11 +443,12 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list)
 }
 
 static double runtime_stat_avg(struct runtime_stat *st,
-			       enum stat_type type, int ctx, int cpu)
+			       enum stat_type type, int cpu,
+			       struct runtime_stat_data *rsd)
 {
 	struct saved_value *v;
 
-	v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
+	v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp);
 	if (!v)
 		return 0.0;
 
@@ -434,11 +456,12 @@ static double runtime_stat_avg(struct runtime_stat *st,
 }
 
 static double runtime_stat_n(struct runtime_stat *st,
-			     enum stat_type type, int ctx, int cpu)
+			     enum stat_type type, int cpu,
+			     struct runtime_stat_data *rsd)
 {
 	struct saved_value *v;
 
-	v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
+	v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp);
 	if (!v)
 		return 0.0;
 
@@ -446,16 +469,15 @@ static double runtime_stat_n(struct runtime_stat *st,
 }
 
 static void print_stalled_cycles_frontend(struct perf_stat_config *config,
-					  int cpu,
-					  struct evsel *evsel, double avg,
+					  int cpu, double avg,
 					  struct perf_stat_output_ctx *out,
-					  struct runtime_stat *st)
+					  struct runtime_stat *st,
+					  struct runtime_stat_data *rsd)
 {
 	double total, ratio = 0.0;
 	const char *color;
-	int ctx = evsel_context(evsel);
 
-	total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+	total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -470,16 +492,15 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config,
 }
 
 static void print_stalled_cycles_backend(struct perf_stat_config *config,
-					 int cpu,
-					 struct evsel *evsel, double avg,
+					 int cpu, double avg,
 					 struct perf_stat_output_ctx *out,
-					 struct runtime_stat *st)
+					 struct runtime_stat *st,
+					 struct runtime_stat_data *rsd)
 {
 	double total, ratio = 0.0;
 	const char *color;
-	int ctx = evsel_context(evsel);
 
-	total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+	total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -490,17 +511,15 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config,
 }
 
 static void print_branch_misses(struct perf_stat_config *config,
-				int cpu,
-				struct evsel *evsel,
-				double avg,
+				int cpu, double avg,
 				struct perf_stat_output_ctx *out,
-				struct runtime_stat *st)
+				struct runtime_stat *st,
+				struct runtime_stat_data *rsd)
 {
 	double total, ratio = 0.0;
 	const char *color;
-	int ctx = evsel_context(evsel);
 
-	total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu);
+	total = runtime_stat_avg(st, STAT_BRANCHES, cpu, rsd);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -511,18 +530,15 @@ static void print_branch_misses(struct perf_stat_config *config,
 }
 
 static void print_l1_dcache_misses(struct perf_stat_config *config,
-				   int cpu,
-				   struct evsel *evsel,
-				   double avg,
+				   int cpu, double avg,
 				   struct perf_stat_output_ctx *out,
-				   struct runtime_stat *st)
-
+				   struct runtime_stat *st,
+				   struct runtime_stat_data *rsd)
 {
 	double total, ratio = 0.0;
 	const char *color;
-	int ctx = evsel_context(evsel);
 
-	total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu);
+	total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu, rsd);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -533,18 +549,15 @@ static void print_l1_dcache_misses(struct perf_stat_config *config,
 }
 
 static void print_l1_icache_misses(struct perf_stat_config *config,
-				   int cpu,
-				   struct evsel *evsel,
-				   double avg,
+				   int cpu, double avg,
 				   struct perf_stat_output_ctx *out,
-				   struct runtime_stat *st)
-
+				   struct runtime_stat *st,
+				   struct runtime_stat_data *rsd)
 {
 	double total, ratio = 0.0;
 	const char *color;
-	int ctx = evsel_context(evsel);
 
-	total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu);
+	total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu, rsd);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -554,17 +567,15 @@ static void print_l1_icache_misses(struct perf_stat_config *config,
 }
 
 static void print_dtlb_cache_misses(struct perf_stat_config *config,
-				    int cpu,
-				    struct evsel *evsel,
-				    double avg,
+				    int cpu, double avg,
 				    struct perf_stat_output_ctx *out,
-				    struct runtime_stat *st)
+				    struct runtime_stat *st,
+				    struct runtime_stat_data *rsd)
 {
 	double total, ratio = 0.0;
 	const char *color;
-	int ctx = evsel_context(evsel);
 
-	total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu);
+	total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu, rsd);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -574,17 +585,15 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config,
 }
 
 static void print_itlb_cache_misses(struct perf_stat_config *config,
-				    int cpu,
-				    struct evsel *evsel,
-				    double avg,
+				    int cpu, double avg,
 				    struct perf_stat_output_ctx *out,
-				    struct runtime_stat *st)
+				    struct runtime_stat *st,
+				    struct runtime_stat_data *rsd)
 {
 	double total, ratio = 0.0;
 	const char *color;
-	int ctx = evsel_context(evsel);
 
-	total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu);
+	total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu, rsd);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -594,17 +603,15 @@ static void print_itlb_cache_misses(struct perf_stat_config *config,
 }
 
 static void print_ll_cache_misses(struct perf_stat_config *config,
-				  int cpu,
-				  struct evsel *evsel,
-				  double avg,
+				  int cpu, double avg,
 				  struct perf_stat_output_ctx *out,
-				  struct runtime_stat *st)
+				  struct runtime_stat *st,
+				  struct runtime_stat_data *rsd)
 {
 	double total, ratio = 0.0;
 	const char *color;
-	int ctx = evsel_context(evsel);
 
-	total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu);
+	total = runtime_stat_avg(st, STAT_LL_CACHE, cpu, rsd);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -662,56 +669,61 @@ static double sanitize_val(double x)
 	return x;
 }
 
-static double td_total_slots(int ctx, int cpu, struct runtime_stat *st)
+static double td_total_slots(int cpu, struct runtime_stat *st,
+			     struct runtime_stat_data *rsd)
 {
-	return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu);
+	return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu, rsd);
 }
 
-static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st)
+static double td_bad_spec(int cpu, struct runtime_stat *st,
+			  struct runtime_stat_data *rsd)
 {
 	double bad_spec = 0;
 	double total_slots;
 	double total;
 
-	total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) -
-		runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) +
-		runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu);
+	total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu, rsd) -
+		runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu, rsd) +
+		runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu, rsd);
 
-	total_slots = td_total_slots(ctx, cpu, st);
+	total_slots = td_total_slots(cpu, st, rsd);
 	if (total_slots)
 		bad_spec = total / total_slots;
 	return sanitize_val(bad_spec);
 }
 
-static double td_retiring(int ctx, int cpu, struct runtime_stat *st)
+static double td_retiring(int cpu, struct runtime_stat *st,
+			  struct runtime_stat_data *rsd)
 {
 	double retiring = 0;
-	double total_slots = td_total_slots(ctx, cpu, st);
+	double total_slots = td_total_slots(cpu, st, rsd);
 	double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED,
-					    ctx, cpu);
+					    cpu, rsd);
 
 	if (total_slots)
 		retiring = ret_slots / total_slots;
 	return retiring;
 }
 
-static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st)
+static double td_fe_bound(int cpu, struct runtime_stat *st,
+			  struct runtime_stat_data *rsd)
 {
 	double fe_bound = 0;
-	double total_slots = td_total_slots(ctx, cpu, st);
+	double total_slots = td_total_slots(cpu, st, rsd);
 	double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES,
-					    ctx, cpu);
+					    cpu, rsd);
 
 	if (total_slots)
 		fe_bound = fetch_bub / total_slots;
 	return fe_bound;
 }
 
-static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)
+static double td_be_bound(int cpu, struct runtime_stat *st,
+			  struct runtime_stat_data *rsd)
 {
-	double sum = (td_fe_bound(ctx, cpu, st) +
-		      td_bad_spec(ctx, cpu, st) +
-		      td_retiring(ctx, cpu, st));
+	double sum = (td_fe_bound(cpu, st, rsd) +
+		      td_bad_spec(cpu, st, rsd) +
+		      td_retiring(cpu, st, rsd));
 	if (sum == 0)
 		return 0;
 	return sanitize_val(1.0 - sum);
@@ -722,15 +734,15 @@ static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)
  * the ratios we need to recreate the sum.
  */
 
-static double td_metric_ratio(int ctx, int cpu,
-			      enum stat_type type,
-			      struct runtime_stat *stat)
+static double td_metric_ratio(int cpu, enum stat_type type,
+			      struct runtime_stat *stat,
+			      struct runtime_stat_data *rsd)
 {
-	double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) +
-		runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) +
-		runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) +
-		runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu);
-	double d = runtime_stat_avg(stat, type, ctx, cpu);
+	double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) +
+		runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) +
+		runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) +
+		runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd);
+	double d = runtime_stat_avg(stat, type, cpu, rsd);
 
 	if (sum)
 		return d / sum;
@@ -742,34 +754,33 @@ static double td_metric_ratio(int ctx, int cpu,
  * We allow two missing.
  */
 
-static bool full_td(int ctx, int cpu,
-		    struct runtime_stat *stat)
+static bool full_td(int cpu, struct runtime_stat *stat,
+		    struct runtime_stat_data *rsd)
 {
 	int c = 0;
 
-	if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) > 0)
+	if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) > 0)
 		c++;
-	if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) > 0)
+	if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) > 0)
 		c++;
-	if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) > 0)
+	if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) > 0)
 		c++;
-	if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu) > 0)
+	if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd) > 0)
 		c++;
 	return c >= 2;
 }
 
-static void print_smi_cost(struct perf_stat_config *config,
-			   int cpu, struct evsel *evsel,
+static void print_smi_cost(struct perf_stat_config *config, int cpu,
 			   struct perf_stat_output_ctx *out,
-			   struct runtime_stat *st)
+			   struct runtime_stat *st,
+			   struct runtime_stat_data *rsd)
 {
 	double smi_num, aperf, cycles, cost = 0.0;
-	int ctx = evsel_context(evsel);
 	const char *color = NULL;
 
-	smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu);
-	aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu);
-	cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+	smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu, rsd);
+	aperf = runtime_stat_avg(st, STAT_APERF, cpu, rsd);
+	cycles = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd);
 
 	if ((cycles == 0) || (aperf == 0))
 		return;
@@ -804,7 +815,8 @@ static int prepare_metric(struct evsel **metric_events,
 			scale = 1e-9;
 		} else {
 			v = saved_value_lookup(metric_events[i], cpu, false,
-					       STAT_NONE, 0, st);
+					       STAT_NONE, 0, st,
+					       metric_events[i]->cgrp);
 			if (!v)
 				break;
 			stats = &v->stats;
@@ -930,12 +942,15 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 	print_metric_t print_metric = out->print_metric;
 	double total, ratio = 0.0, total2;
 	const char *color = NULL;
-	int ctx = evsel_context(evsel);
+	struct runtime_stat_data rsd = {
+		.ctx = evsel_context(evsel),
+		.cgrp = evsel->cgrp,
+	};
 	struct metric_event *me;
 	int num = 1;
 
 	if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
-		total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+		total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd);
 
 		if (total) {
 			ratio = avg / total;
@@ -945,12 +960,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 			print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
 		}
 
-		total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT,
-					 ctx, cpu);
+		total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu, &rsd);
 
 		total = max(total, runtime_stat_avg(st,
 						    STAT_STALLED_CYCLES_BACK,
-						    ctx, cpu));
+						    cpu, &rsd));
 
 		if (total && avg) {
 			out->new_line(config, ctxp);
@@ -960,8 +974,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 					ratio);
 		}
 	} else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
-		if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0)
-			print_branch_misses(config, cpu, evsel, avg, out, st);
+		if (runtime_stat_n(st, STAT_BRANCHES, cpu, &rsd) != 0)
+			print_branch_misses(config, cpu, avg, out, st, &rsd);
 		else
 			print_metric(config, ctxp, NULL, NULL, "of all branches", 0);
 	} else if (
@@ -970,8 +984,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
 
-		if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0)
-			print_l1_dcache_misses(config, cpu, evsel, avg, out, st);
+		if (runtime_stat_n(st, STAT_L1_DCACHE, cpu, &rsd) != 0)
+			print_l1_dcache_misses(config, cpu, avg, out, st, &rsd);
 		else
 			print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0);
 	} else if (
@@ -980,8 +994,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
 
-		if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0)
-			print_l1_icache_misses(config, cpu, evsel, avg, out, st);
+		if (runtime_stat_n(st, STAT_L1_ICACHE, cpu, &rsd) != 0)
+			print_l1_icache_misses(config, cpu, avg, out, st, &rsd);
 		else
 			print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0);
 	} else if (
@@ -990,8 +1004,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
 
-		if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0)
-			print_dtlb_cache_misses(config, cpu, evsel, avg, out, st);
+		if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu, &rsd) != 0)
+			print_dtlb_cache_misses(config, cpu, avg, out, st, &rsd);
 		else
 			print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0);
 	} else if (
@@ -1000,8 +1014,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
 
-		if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0)
-			print_itlb_cache_misses(config, cpu, evsel, avg, out, st);
+		if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu, &rsd) != 0)
+			print_itlb_cache_misses(config, cpu, avg, out, st, &rsd);
 		else
 			print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0);
 	} else if (
@@ -1010,27 +1024,27 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
 
-		if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0)
-			print_ll_cache_misses(config, cpu, evsel, avg, out, st);
+		if (runtime_stat_n(st, STAT_LL_CACHE, cpu, &rsd) != 0)
+			print_ll_cache_misses(config, cpu, avg, out, st, &rsd);
 		else
 			print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0);
 	} else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
-		total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu);
+		total = runtime_stat_avg(st, STAT_CACHEREFS, cpu, &rsd);
 
 		if (total)
 			ratio = avg * 100 / total;
 
-		if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0)
+		if (runtime_stat_n(st, STAT_CACHEREFS, cpu, &rsd) != 0)
 			print_metric(config, ctxp, NULL, "%8.3f %%",
 				     "of all cache refs", ratio);
 		else
 			print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0);
 	} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
-		print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st);
+		print_stalled_cycles_frontend(config, cpu, avg, out, st, &rsd);
 	} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
-		print_stalled_cycles_backend(config, cpu, evsel, avg, out, st);
+		print_stalled_cycles_backend(config, cpu, avg, out, st, &rsd);
 	} else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
-		total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
+		total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd);
 
 		if (total) {
 			ratio = avg / total;
@@ -1039,7 +1053,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 			print_metric(config, ctxp, NULL, NULL, "Ghz", 0);
 		}
 	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
-		total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+		total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd);
 
 		if (total)
 			print_metric(config, ctxp, NULL,
@@ -1049,8 +1063,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 			print_metric(config, ctxp, NULL, NULL, "transactional cycles",
 				     0);
 	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
-		total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
-		total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu);
+		total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd);
+		total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd);
 
 		if (total2 < avg)
 			total2 = avg;
@@ -1060,21 +1074,19 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 		else
 			print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0);
 	} else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
-		total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
-					 ctx, cpu);
+		total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd);
 
 		if (avg)
 			ratio = total / avg;
 
-		if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0)
+		if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu, &rsd) != 0)
 			print_metric(config, ctxp, NULL, "%8.0f",
 				     "cycles / transaction", ratio);
 		else
 			print_metric(config, ctxp, NULL, NULL, "cycles / transaction",
 				      0);
 	} else if (perf_stat_evsel__is(evsel, ELISION_START)) {
-		total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
-					 ctx, cpu);
+		total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd);
 
 		if (avg)
 			ratio = total / avg;
@@ -1087,28 +1099,28 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 		else
 			print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
 	} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
-		double fe_bound = td_fe_bound(ctx, cpu, st);
+		double fe_bound = td_fe_bound(cpu, st, &rsd);
 
 		if (fe_bound > 0.2)
 			color = PERF_COLOR_RED;
 		print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
 				fe_bound * 100.);
 	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
-		double retiring = td_retiring(ctx, cpu, st);
+		double retiring = td_retiring(cpu, st, &rsd);
 
 		if (retiring > 0.7)
 			color = PERF_COLOR_GREEN;
 		print_metric(config, ctxp, color, "%8.1f%%", "retiring",
 				retiring * 100.);
 	} else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
-		double bad_spec = td_bad_spec(ctx, cpu, st);
+		double bad_spec = td_bad_spec(cpu, st, &rsd);
 
 		if (bad_spec > 0.1)
 			color = PERF_COLOR_RED;
 		print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
 				bad_spec * 100.);
 	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
-		double be_bound = td_be_bound(ctx, cpu, st);
+		double be_bound = td_be_bound(cpu, st, &rsd);
 		const char *name = "backend bound";
 		static int have_recovery_bubbles = -1;
 
@@ -1121,43 +1133,43 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 
 		if (be_bound > 0.2)
 			color = PERF_COLOR_RED;
-		if (td_total_slots(ctx, cpu, st) > 0)
+		if (td_total_slots(cpu, st, &rsd) > 0)
 			print_metric(config, ctxp, color, "%8.1f%%", name,
 					be_bound * 100.);
 		else
 			print_metric(config, ctxp, NULL, NULL, name, 0);
 	} else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) &&
-			full_td(ctx, cpu, st)) {
-		double retiring = td_metric_ratio(ctx, cpu,
-						  STAT_TOPDOWN_RETIRING, st);
-
+		   full_td(cpu, st, &rsd)) {
+		double retiring = td_metric_ratio(cpu,
+						  STAT_TOPDOWN_RETIRING, st,
+						  &rsd);
 		if (retiring > 0.7)
 			color = PERF_COLOR_GREEN;
 		print_metric(config, ctxp, color, "%8.1f%%", "retiring",
 				retiring * 100.);
 	} else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) &&
-			full_td(ctx, cpu, st)) {
-		double fe_bound = td_metric_ratio(ctx, cpu,
-						  STAT_TOPDOWN_FE_BOUND, st);
-
+		   full_td(cpu, st, &rsd)) {
+		double fe_bound = td_metric_ratio(cpu,
+						  STAT_TOPDOWN_FE_BOUND, st,
+						  &rsd);
 		if (fe_bound > 0.2)
 			color = PERF_COLOR_RED;
 		print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
 				fe_bound * 100.);
 	} else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) &&
-			full_td(ctx, cpu, st)) {
-		double be_bound = td_metric_ratio(ctx, cpu,
-						  STAT_TOPDOWN_BE_BOUND, st);
-
+		   full_td(cpu, st, &rsd)) {
+		double be_bound = td_metric_ratio(cpu,
+						  STAT_TOPDOWN_BE_BOUND, st,
+						  &rsd);
 		if (be_bound > 0.2)
 			color = PERF_COLOR_RED;
 		print_metric(config, ctxp, color, "%8.1f%%", "backend bound",
 				be_bound * 100.);
 	} else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) &&
-			full_td(ctx, cpu, st)) {
-		double bad_spec = td_metric_ratio(ctx, cpu,
-						  STAT_TOPDOWN_BAD_SPEC, st);
-
+		   full_td(cpu, st, &rsd)) {
+		double bad_spec = td_metric_ratio(cpu,
+						  STAT_TOPDOWN_BAD_SPEC, st,
+						  &rsd);
 		if (bad_spec > 0.1)
 			color = PERF_COLOR_RED;
 		print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
@@ -1165,11 +1177,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 	} else if (evsel->metric_expr) {
 		generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
 				evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
-	} else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
+	} else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) {
 		char unit = 'M';
 		char unit_buf[10];
 
-		total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
+		total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd);
 
 		if (total)
 			ratio = 1000.0 * avg / total;
@@ -1180,7 +1192,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 		snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
 		print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
 	} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
-		print_smi_cost(config, cpu, evsel, out, st);
+		print_smi_cost(config, cpu, out, st, &rsd);
 	} else {
 		num = 0;
 	}
diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S
index 1c5556bdd11d16e599677f661a6997cdd7f27538..0dbd594c2747cb0e82f9f27970e0905108069a3a 100644
--- a/tools/testing/selftests/arm64/fp/fpsimd-test.S
+++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S
@@ -457,7 +457,7 @@ function barf
 	mov	x11, x1	// actual data
 	mov	x12, x2	// data size
 
-	puts	"Mistatch: PID="
+	puts	"Mismatch: PID="
 	mov	x0, x20
 	bl	putdec
 	puts	", iteration="
diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S
index f95074c9b48b730e3055a70f121a8cd3e11783bc..9210691aa9985601995eef4b8af58da33cd993b9 100644
--- a/tools/testing/selftests/arm64/fp/sve-test.S
+++ b/tools/testing/selftests/arm64/fp/sve-test.S
@@ -625,7 +625,7 @@ function barf
 	mov	x11, x1	// actual data
 	mov	x12, x2	// data size
 
-	puts	"Mistatch: PID="
+	puts	"Mismatch: PID="
 	mov	x0, x20
 	bl	putdec
 	puts	", iteration="
diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
index c0fe73a17ed1234ae7517b452364b32e58760771..3bfcf00c0a673771498f586410b062a115bd8220 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
@@ -34,61 +34,6 @@ struct storage {
 	struct bpf_spin_lock lock;
 };
 
-/* Copies an rm binary to a temp file. dest is a mkstemp template */
-static int copy_rm(char *dest)
-{
-	int fd_in, fd_out = -1, ret = 0;
-	struct stat stat;
-	char *buf = NULL;
-
-	fd_in = open("/bin/rm", O_RDONLY);
-	if (fd_in < 0)
-		return -errno;
-
-	fd_out = mkstemp(dest);
-	if (fd_out < 0) {
-		ret = -errno;
-		goto out;
-	}
-
-	ret = fstat(fd_in, &stat);
-	if (ret == -1) {
-		ret = -errno;
-		goto out;
-	}
-
-	buf = malloc(stat.st_blksize);
-	if (!buf) {
-		ret = -errno;
-		goto out;
-	}
-
-	while (ret = read(fd_in, buf, stat.st_blksize), ret > 0) {
-		ret = write(fd_out, buf, ret);
-		if (ret < 0) {
-			ret = -errno;
-			goto out;
-
-		}
-	}
-	if (ret < 0) {
-		ret = -errno;
-		goto out;
-
-	}
-
-	/* Set executable permission on the copied file */
-	ret = chmod(dest, 0100);
-	if (ret == -1)
-		ret = -errno;
-
-out:
-	free(buf);
-	close(fd_in);
-	close(fd_out);
-	return ret;
-}
-
 /* Fork and exec the provided rm binary and return the exit code of the
  * forked process and its pid.
  */
@@ -168,9 +113,11 @@ static bool check_syscall_operations(int map_fd, int obj_fd)
 
 void test_test_local_storage(void)
 {
-	char tmp_exec_path[PATH_MAX] = "/tmp/copy_of_rmXXXXXX";
+	char tmp_dir_path[64] = "/tmp/local_storageXXXXXX";
 	int err, serv_sk = -1, task_fd = -1, rm_fd = -1;
 	struct local_storage *skel = NULL;
+	char tmp_exec_path[64];
+	char cmd[256];
 
 	skel = local_storage__open_and_load();
 	if (CHECK(!skel, "skel_load", "lsm skeleton failed\n"))
@@ -189,18 +136,24 @@ void test_test_local_storage(void)
 				      task_fd))
 		goto close_prog;
 
-	err = copy_rm(tmp_exec_path);
-	if (CHECK(err < 0, "copy_rm", "err %d errno %d\n", err, errno))
+	if (CHECK(!mkdtemp(tmp_dir_path), "mkdtemp",
+		  "unable to create tmpdir: %d\n", errno))
 		goto close_prog;
 
+	snprintf(tmp_exec_path, sizeof(tmp_exec_path), "%s/copy_of_rm",
+		 tmp_dir_path);
+	snprintf(cmd, sizeof(cmd), "cp /bin/rm %s", tmp_exec_path);
+	if (CHECK_FAIL(system(cmd)))
+		goto close_prog_rmdir;
+
 	rm_fd = open(tmp_exec_path, O_RDONLY);
 	if (CHECK(rm_fd < 0, "open", "failed to open %s err:%d, errno:%d",
 		  tmp_exec_path, rm_fd, errno))
-		goto close_prog;
+		goto close_prog_rmdir;
 
 	if (!check_syscall_operations(bpf_map__fd(skel->maps.inode_storage_map),
 				      rm_fd))
-		goto close_prog;
+		goto close_prog_rmdir;
 
 	/* Sets skel->bss->monitored_pid to the pid of the forked child
 	 * forks a child process that executes tmp_exec_path and tries to
@@ -209,33 +162,36 @@ void test_test_local_storage(void)
 	 */
 	err = run_self_unlink(&skel->bss->monitored_pid, tmp_exec_path);
 	if (CHECK(err != EPERM, "run_self_unlink", "err %d want EPERM\n", err))
-		goto close_prog_unlink;
+		goto close_prog_rmdir;
 
 	/* Set the process being monitored to be the current process */
 	skel->bss->monitored_pid = getpid();
 
-	/* Remove the temporary created executable */
-	err = unlink(tmp_exec_path);
-	if (CHECK(err != 0, "unlink", "unable to unlink %s: %d", tmp_exec_path,
-		  errno))
-		goto close_prog_unlink;
+	/* Move copy_of_rm to a new location so that it triggers the
+	 * inode_rename LSM hook with a new_dentry that has a NULL inode ptr.
+	 */
+	snprintf(cmd, sizeof(cmd), "mv %s/copy_of_rm %s/check_null_ptr",
+		 tmp_dir_path, tmp_dir_path);
+	if (CHECK_FAIL(system(cmd)))
+		goto close_prog_rmdir;
 
 	CHECK(skel->data->inode_storage_result != 0, "inode_storage_result",
 	      "inode_local_storage not set\n");
 
 	serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
 	if (CHECK(serv_sk < 0, "start_server", "failed to start server\n"))
-		goto close_prog;
+		goto close_prog_rmdir;
 
 	CHECK(skel->data->sk_storage_result != 0, "sk_storage_result",
 	      "sk_local_storage not set\n");
 
 	if (!check_syscall_operations(bpf_map__fd(skel->maps.sk_storage_map),
 				      serv_sk))
-		goto close_prog;
+		goto close_prog_rmdir;
 
-close_prog_unlink:
-	unlink(tmp_exec_path);
+close_prog_rmdir:
+	snprintf(cmd, sizeof(cmd), "rm -rf %s", tmp_dir_path);
+	system(cmd);
 close_prog:
 	close(serv_sk);
 	close(rm_fd);
diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c
index 3e3de130f28f303e30a45bff6501e502f905fa93..95868bc7ada9873efcbf9edb16b6bed5f8ec1b4c 100644
--- a/tools/testing/selftests/bpf/progs/local_storage.c
+++ b/tools/testing/selftests/bpf/progs/local_storage.c
@@ -50,7 +50,6 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
 	__u32 pid = bpf_get_current_pid_tgid() >> 32;
 	struct local_storage *storage;
 	bool is_self_unlink;
-	int err;
 
 	if (pid != monitored_pid)
 		return 0;
@@ -66,8 +65,27 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
 			return -EPERM;
 	}
 
-	storage = bpf_inode_storage_get(&inode_storage_map, victim->d_inode, 0,
-					BPF_LOCAL_STORAGE_GET_F_CREATE);
+	return 0;
+}
+
+SEC("lsm/inode_rename")
+int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
+	     struct inode *new_dir, struct dentry *new_dentry,
+	     unsigned int flags)
+{
+	__u32 pid = bpf_get_current_pid_tgid() >> 32;
+	struct local_storage *storage;
+	int err;
+
+	/* new_dentry->d_inode can be NULL when the inode is renamed to a file
+	 * that did not exist before. The helper should be able to handle this
+	 * NULL pointer.
+	 */
+	bpf_inode_storage_get(&inode_storage_map, new_dentry->d_inode, 0,
+			      BPF_LOCAL_STORAGE_GET_F_CREATE);
+
+	storage = bpf_inode_storage_get(&inode_storage_map, old_dentry->d_inode,
+					0, 0);
 	if (!storage)
 		return 0;
 
@@ -76,7 +94,7 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
 		inode_storage_result = -1;
 	bpf_spin_unlock(&storage->lock);
 
-	err = bpf_inode_storage_delete(&inode_storage_map, victim->d_inode);
+	err = bpf_inode_storage_delete(&inode_storage_map, old_dentry->d_inode);
 	if (!err)
 		inode_storage_result = err;
 
@@ -133,37 +151,18 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
 	return 0;
 }
 
-SEC("lsm/file_open")
-int BPF_PROG(file_open, struct file *file)
-{
-	__u32 pid = bpf_get_current_pid_tgid() >> 32;
-	struct local_storage *storage;
-
-	if (pid != monitored_pid)
-		return 0;
-
-	if (!file->f_inode)
-		return 0;
-
-	storage = bpf_inode_storage_get(&inode_storage_map, file->f_inode, 0,
-					BPF_LOCAL_STORAGE_GET_F_CREATE);
-	if (!storage)
-		return 0;
-
-	bpf_spin_lock(&storage->lock);
-	storage->value = DUMMY_STORAGE_VALUE;
-	bpf_spin_unlock(&storage->lock);
-	return 0;
-}
-
 /* This uses the local storage to remember the inode of the binary that a
  * process was originally executing.
  */
 SEC("lsm/bprm_committed_creds")
 void BPF_PROG(exec, struct linux_binprm *bprm)
 {
+	__u32 pid = bpf_get_current_pid_tgid() >> 32;
 	struct local_storage *storage;
 
+	if (pid != monitored_pid)
+		return;
+
 	storage = bpf_task_storage_get(&task_storage_map,
 				       bpf_get_current_task_btf(), 0,
 				       BPF_LOCAL_STORAGE_GET_F_CREATE);
@@ -172,4 +171,13 @@ void BPF_PROG(exec, struct linux_binprm *bprm)
 		storage->exec_inode = bprm->file->f_inode;
 		bpf_spin_unlock(&storage->lock);
 	}
+
+	storage = bpf_inode_storage_get(&inode_storage_map, bprm->file->f_inode,
+					0, BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (!storage)
+		return;
+
+	bpf_spin_lock(&storage->lock);
+	storage->value = DUMMY_STORAGE_VALUE;
+	bpf_spin_unlock(&storage->lock);
 }
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 777a81404fdbd15a9643151c108a51db7d4478f3..f8569f04064b7e9684e3a93e1e03ab1e2f13fa7c 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -50,7 +50,7 @@
 #define MAX_INSNS	BPF_MAXINSNS
 #define MAX_TEST_INSNS	1000000
 #define MAX_FIXUPS	8
-#define MAX_NR_MAPS	20
+#define MAX_NR_MAPS	21
 #define MAX_TEST_RUNS	8
 #define POINTER_VALUE	0xcafe4all
 #define TEST_DATA_LEN	64
@@ -87,6 +87,7 @@ struct bpf_test {
 	int fixup_sk_storage_map[MAX_FIXUPS];
 	int fixup_map_event_output[MAX_FIXUPS];
 	int fixup_map_reuseport_array[MAX_FIXUPS];
+	int fixup_map_ringbuf[MAX_FIXUPS];
 	const char *errstr;
 	const char *errstr_unpriv;
 	uint32_t insn_processed;
@@ -640,6 +641,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 	int *fixup_sk_storage_map = test->fixup_sk_storage_map;
 	int *fixup_map_event_output = test->fixup_map_event_output;
 	int *fixup_map_reuseport_array = test->fixup_map_reuseport_array;
+	int *fixup_map_ringbuf = test->fixup_map_ringbuf;
 
 	if (test->fill_helper) {
 		test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
@@ -817,6 +819,14 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 			fixup_map_reuseport_array++;
 		} while (*fixup_map_reuseport_array);
 	}
+	if (*fixup_map_ringbuf) {
+		map_fds[20] = create_map(BPF_MAP_TYPE_RINGBUF, 0,
+					   0, 4096);
+		do {
+			prog[*fixup_map_ringbuf].imm = map_fds[20];
+			fixup_map_ringbuf++;
+		} while (*fixup_map_ringbuf);
+	}
 }
 
 struct libcap {
diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c
index 45d43bf82f269190bc51ab29f7032e909103dca0..0b943897aaf6c136a8fea40a3804042b28c44e37 100644
--- a/tools/testing/selftests/bpf/verifier/spill_fill.c
+++ b/tools/testing/selftests/bpf/verifier/spill_fill.c
@@ -28,6 +28,36 @@
 	.result = ACCEPT,
 	.result_unpriv = ACCEPT,
 },
+{
+	"check valid spill/fill, ptr to mem",
+	.insns = {
+	/* reserve 8 byte ringbuf memory */
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_MOV64_IMM(BPF_REG_2, 8),
+	BPF_MOV64_IMM(BPF_REG_3, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
+	/* store a pointer to the reserved memory in R6 */
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+	/* check whether the reservation was successful */
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+	/* spill R6(mem) into the stack */
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
+	/* fill it back in R7 */
+	BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8),
+	/* should be able to access *(R7) = 0 */
+	BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0),
+	/* submit the reserved ringbuf memory */
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+	BPF_MOV64_IMM(BPF_REG_2, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_ringbuf = { 1 },
+	.result = ACCEPT,
+	.result_unpriv = ACCEPT,
+},
 {
 	"check corrupted spill/fill",
 	.insns = {
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 84205c3a55ebed11ddbbf307d771338e0f4d1ac5..2b5707738609ef313e533c78492e25bbc6a02313 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -1055,7 +1055,6 @@ ipv6_addr_metric_test()
 
 	check_route6 "2001:db8:104::1 dev dummy2 proto kernel metric 260"
 	log_test $? 0 "Set metric with peer route on local side"
-	log_test $? 0 "User specified metric on local address"
 	check_route6 "2001:db8:104::2 dev dummy2 proto kernel metric 260"
 	log_test $? 0 "Set metric with peer route on peer side"