diff --git a/arch/arm/mach-sti/headsmp.S b/arch/arm/mach-sti/headsmp.S
index 4c09bae86edf02cb4f9408821fea25cd84d9dddb..e0ad451700d5ebe9226b0ac49922e31fc9802e2b 100644
--- a/arch/arm/mach-sti/headsmp.S
+++ b/arch/arm/mach-sti/headsmp.S
@@ -37,6 +37,7 @@ pen:	ldr	r7, [r6]
 	 * should now contain the SVC stack for this core
 	 */
 	b	secondary_startup
+ENDPROC(sti_secondary_startup)
 
 1:	.long	.
 	.long	pen_release
diff --git a/arch/arm/mach-sti/platsmp.c b/arch/arm/mach-sti/platsmp.c
index d4b624f8dfcb0dc1bcf2f9342983faa079d33b4a..c4ad6eae67faf52b785fa822b548c11ea076db38 100644
--- a/arch/arm/mach-sti/platsmp.c
+++ b/arch/arm/mach-sti/platsmp.c
@@ -20,6 +20,7 @@
 #include <linux/io.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/memblock.h>
 
 #include <asm/cacheflush.h>
 #include <asm/smp_plat.h>
@@ -38,8 +39,6 @@ static DEFINE_SPINLOCK(boot_lock);
 
 static void sti_secondary_init(unsigned int cpu)
 {
-	trace_hardirqs_off();
-
 	/*
 	 * let the primary processor know we're out of the
 	 * pen, then head off into the C entry point
@@ -99,14 +98,62 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle)
 
 static void __init sti_smp_prepare_cpus(unsigned int max_cpus)
 {
-	void __iomem *scu_base = NULL;
-	struct device_node *np = of_find_compatible_node(
-					NULL, NULL, "arm,cortex-a9-scu");
+	struct device_node *np;
+	void __iomem *scu_base;
+	u32 __iomem *cpu_strt_ptr;
+	u32 release_phys;
+	int cpu;
+	unsigned long entry_pa = virt_to_phys(sti_secondary_startup);
+
+	np = of_find_compatible_node(NULL, NULL, "arm,cortex-a9-scu");
+
 	if (np) {
 		scu_base = of_iomap(np, 0);
 		scu_enable(scu_base);
 		of_node_put(np);
 	}
+
+	if (max_cpus <= 1)
+		return;
+
+	for_each_possible_cpu(cpu) {
+
+		np = of_get_cpu_node(cpu, NULL);
+
+		if (!np)
+			continue;
+
+		if (of_property_read_u32(np, "cpu-release-addr",
+						&release_phys)) {
+			pr_err("CPU %d: missing or invalid cpu-release-addr "
+				"property\n", cpu);
+			continue;
+		}
+
+		/*
+		 * holding pen is usually configured in SBC DMEM but can also be
+		 * in RAM.
+		 */
+
+		if (!memblock_is_memory(release_phys))
+			cpu_strt_ptr =
+				ioremap(release_phys, sizeof(release_phys));
+		else
+			cpu_strt_ptr =
+				(u32 __iomem *)phys_to_virt(release_phys);
+
+		__raw_writel(entry_pa, cpu_strt_ptr);
+
+		/*
+		 * wmb so that data is actually written
+		 * before cache flush is done
+		 */
+		smp_wmb();
+		sync_cache_w(cpu_strt_ptr);
+
+		if (!memblock_is_memory(release_phys))
+			iounmap(cpu_strt_ptr);
+	}
 }
 
 struct smp_operations __initdata sti_smp_ops = {
diff --git a/arch/arm/mach-sti/smp.h b/arch/arm/mach-sti/smp.h
index 1871b72b1a7e6cd4378f468d9955327bcb78fb97..ae22707d301fa1c2070ca03dafae1aa3e0a59d16 100644
--- a/arch/arm/mach-sti/smp.h
+++ b/arch/arm/mach-sti/smp.h
@@ -14,4 +14,6 @@
 
 extern struct smp_operations	sti_smp_ops;
 
+void sti_secondary_startup(void);
+
 #endif