diff --git a/Documentation/trace/hwlat_detector.txt b/Documentation/trace/hwlat_detector.txt
index c02e8ef800cf514a5d3c1dbfdbbf8b8023e60f19..3207717a0d1ac4c6e251f091e1f7d2dda3056aef 100644
--- a/Documentation/trace/hwlat_detector.txt
+++ b/Documentation/trace/hwlat_detector.txt
@@ -69,5 +69,11 @@ in /sys/kernel/tracing:
 
  tracing_threshold	- minimum latency value to be considered (usecs)
  tracing_max_latency	- maximum hardware latency actually observed (usecs)
+ tracing_cpumask	- the CPUs to move the hwlat thread across
  hwlat_detector/width	- specified amount of time to spin within window (usecs)
  hwlat_detector/window	- amount of time between (width) runs (usecs)
+
+The hwlat detector's kernel thread will migrate across each CPU specified in
+tracing_cpumask between each window. To limit the migration, either modify
+tracing_cpumask, or modify the hwlat kernel thread (named [hwlatd]) CPU
+affinity directly, and the migration will stop.
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index 08dfabe4e862a1ee91ceee917b91a03aef707837..65aab3914a561be97fa15f8d7d92c59c472957ca 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -42,6 +42,7 @@
 #include <linux/kthread.h>
 #include <linux/tracefs.h>
 #include <linux/uaccess.h>
+#include <linux/cpumask.h>
 #include <linux/delay.h>
 #include "trace.h"
 
@@ -211,6 +212,57 @@ static int get_sample(void)
 	return ret;
 }
 
+static struct cpumask save_cpumask;
+static bool disable_migrate;
+
+static void move_to_next_cpu(void)
+{
+	static struct cpumask *current_mask;
+	int next_cpu;
+
+	if (disable_migrate)
+		return;
+
+	/* Just pick the first CPU on first iteration */
+	if (!current_mask) {
+		current_mask = &save_cpumask;
+		get_online_cpus();
+		cpumask_and(current_mask, cpu_online_mask, tracing_buffer_mask);
+		put_online_cpus();
+		next_cpu = cpumask_first(current_mask);
+		goto set_affinity;
+	}
+
+	/*
+	 * If for some reason the user modifies the CPU affinity
+	 * of this thread, than stop migrating for the duration
+	 * of the current test.
+	 */
+	if (!cpumask_equal(current_mask, &current->cpus_allowed))
+		goto disable;
+
+	get_online_cpus();
+	cpumask_and(current_mask, cpu_online_mask, tracing_buffer_mask);
+	next_cpu = cpumask_next(smp_processor_id(), current_mask);
+	put_online_cpus();
+
+	if (next_cpu >= nr_cpu_ids)
+		next_cpu = cpumask_first(current_mask);
+
+ set_affinity:
+	if (next_cpu >= nr_cpu_ids) /* Shouldn't happen! */
+		goto disable;
+
+	cpumask_clear(current_mask);
+	cpumask_set_cpu(next_cpu, current_mask);
+
+	sched_setaffinity(0, current_mask);
+	return;
+
+ disable:
+	disable_migrate = true;
+}
+
 /*
  * kthread_fn - The CPU time sampling/hardware latency detection kernel thread
  *
@@ -230,6 +282,8 @@ static int kthread_fn(void *data)
 
 	while (!kthread_should_stop()) {
 
+		move_to_next_cpu();
+
 		local_irq_disable();
 		get_sample();
 		local_irq_enable();
@@ -473,6 +527,7 @@ static int hwlat_tracer_init(struct trace_array *tr)
 
 	hwlat_trace = tr;
 
+	disable_migrate = false;
 	hwlat_data.count = 0;
 	tr->max_latency = 0;
 	save_tracing_thresh = tracing_thresh;