diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c
index 4cf981d70f45b621acbce1df4bf7097d27b6d85d..6979297ce278642c5b4c59844d626cddd7cfdcbd 100644
--- a/arch/i386/lib/usercopy.c
+++ b/arch/i386/lib/usercopy.c
@@ -425,15 +425,121 @@ __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size)
 		       : "eax", "edx", "memory");
 	return size;
 }
+
+/*
+ * Non Temporal Hint version of __copy_user_zeroing_intel.  It is cache aware.
+ * hyoshiok@miraclelinux.com
+ */
+
+static unsigned long __copy_user_zeroing_intel_nocache(void *to,
+				const void __user *from, unsigned long size)
+{
+        int d0, d1;
+
+	__asm__ __volatile__(
+	       "        .align 2,0x90\n"
+	       "0:      movl 32(%4), %%eax\n"
+	       "        cmpl $67, %0\n"
+	       "        jbe 2f\n"
+	       "1:      movl 64(%4), %%eax\n"
+	       "        .align 2,0x90\n"
+	       "2:      movl 0(%4), %%eax\n"
+	       "21:     movl 4(%4), %%edx\n"
+	       "        movnti %%eax, 0(%3)\n"
+	       "        movnti %%edx, 4(%3)\n"
+	       "3:      movl 8(%4), %%eax\n"
+	       "31:     movl 12(%4),%%edx\n"
+	       "        movnti %%eax, 8(%3)\n"
+	       "        movnti %%edx, 12(%3)\n"
+	       "4:      movl 16(%4), %%eax\n"
+	       "41:     movl 20(%4), %%edx\n"
+	       "        movnti %%eax, 16(%3)\n"
+	       "        movnti %%edx, 20(%3)\n"
+	       "10:     movl 24(%4), %%eax\n"
+	       "51:     movl 28(%4), %%edx\n"
+	       "        movnti %%eax, 24(%3)\n"
+	       "        movnti %%edx, 28(%3)\n"
+	       "11:     movl 32(%4), %%eax\n"
+	       "61:     movl 36(%4), %%edx\n"
+	       "        movnti %%eax, 32(%3)\n"
+	       "        movnti %%edx, 36(%3)\n"
+	       "12:     movl 40(%4), %%eax\n"
+	       "71:     movl 44(%4), %%edx\n"
+	       "        movnti %%eax, 40(%3)\n"
+	       "        movnti %%edx, 44(%3)\n"
+	       "13:     movl 48(%4), %%eax\n"
+	       "81:     movl 52(%4), %%edx\n"
+	       "        movnti %%eax, 48(%3)\n"
+	       "        movnti %%edx, 52(%3)\n"
+	       "14:     movl 56(%4), %%eax\n"
+	       "91:     movl 60(%4), %%edx\n"
+	       "        movnti %%eax, 56(%3)\n"
+	       "        movnti %%edx, 60(%3)\n"
+	       "        addl $-64, %0\n"
+	       "        addl $64, %4\n"
+	       "        addl $64, %3\n"
+	       "        cmpl $63, %0\n"
+	       "        ja  0b\n"
+	       "        sfence \n"
+	       "5:      movl  %0, %%eax\n"
+	       "        shrl  $2, %0\n"
+	       "        andl $3, %%eax\n"
+	       "        cld\n"
+	       "6:      rep; movsl\n"
+	       "        movl %%eax,%0\n"
+	       "7:      rep; movsb\n"
+	       "8:\n"
+	       ".section .fixup,\"ax\"\n"
+	       "9:      lea 0(%%eax,%0,4),%0\n"
+	       "16:     pushl %0\n"
+	       "        pushl %%eax\n"
+	       "        xorl %%eax,%%eax\n"
+	       "        rep; stosb\n"
+	       "        popl %%eax\n"
+	       "        popl %0\n"
+	       "        jmp 8b\n"
+	       ".previous\n"
+	       ".section __ex_table,\"a\"\n"
+	       "	.align 4\n"
+	       "	.long 0b,16b\n"
+	       "	.long 1b,16b\n"
+	       "	.long 2b,16b\n"
+	       "	.long 21b,16b\n"
+	       "	.long 3b,16b\n"
+	       "	.long 31b,16b\n"
+	       "	.long 4b,16b\n"
+	       "	.long 41b,16b\n"
+	       "	.long 10b,16b\n"
+	       "	.long 51b,16b\n"
+	       "	.long 11b,16b\n"
+	       "	.long 61b,16b\n"
+	       "	.long 12b,16b\n"
+	       "	.long 71b,16b\n"
+	       "	.long 13b,16b\n"
+	       "	.long 81b,16b\n"
+	       "	.long 14b,16b\n"
+	       "	.long 91b,16b\n"
+	       "	.long 6b,9b\n"
+	       "        .long 7b,16b\n"
+	       ".previous"
+	       : "=&c"(size), "=&D" (d0), "=&S" (d1)
+	       :  "1"(to), "2"(from), "0"(size)
+	       : "eax", "edx", "memory");
+	return size;
+}
+
 #else
+
 /*
  * Leave these declared but undefined.  They should not be any references to
  * them
  */
-unsigned long
-__copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size);
-unsigned long
-__copy_user_intel(void __user *to, const void *from, unsigned long size);
+unsigned long __copy_user_zeroing_intel(void *to, const void __user *from,
+					unsigned long size);
+unsigned long __copy_user_intel(void __user *to, const void *from,
+					unsigned long size);
+unsigned long __copy_user_zeroing_intel_nocache(void *to,
+				const void __user *from, unsigned long size);
 #endif /* CONFIG_X86_INTEL_USERCOPY */
 
 /* Generic arbitrary sized copy.  */
@@ -515,8 +621,8 @@ do {									\
 		: "memory");						\
 } while (0)
 
-
-unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n)
+unsigned long __copy_to_user_ll(void __user *to, const void *from,
+				unsigned long n)
 {
 	BUG_ON((long) n < 0);
 #ifndef CONFIG_X86_WP_WORKS_OK
@@ -576,8 +682,8 @@ unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long
 }
 EXPORT_SYMBOL(__copy_to_user_ll);
 
-unsigned long
-__copy_from_user_ll(void *to, const void __user *from, unsigned long n)
+unsigned long __copy_from_user_ll(void *to, const void __user *from,
+					unsigned long n)
 {
 	BUG_ON((long)n < 0);
 	if (movsl_is_ok(to, from, n))
@@ -588,6 +694,21 @@ __copy_from_user_ll(void *to, const void __user *from, unsigned long n)
 }
 EXPORT_SYMBOL(__copy_from_user_ll);
 
+unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from,
+					unsigned long n)
+{
+	BUG_ON((long)n < 0);
+#ifdef CONFIG_X86_INTEL_USERCOPY
+	if ( n > 64 && cpu_has_xmm2)
+                n = __copy_user_zeroing_intel_nocache(to, from, n);
+	else
+		__copy_user_zeroing(to, from, n);
+#else
+        __copy_user_zeroing(to, from, n);
+#endif
+	return n;
+}
+
 /**
  * copy_to_user: - Copy a block of data into user space.
  * @to:   Destination address, in user space.
diff --git a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h
index 1ec65523ea5e750f0105095476112056061f9693..82af28a943ab1e376723c74ad1b43226639c7ccd 100644
--- a/include/asm-i386/uaccess.h
+++ b/include/asm-i386/uaccess.h
@@ -390,6 +390,8 @@ unsigned long __must_check __copy_to_user_ll(void __user *to,
 				const void *from, unsigned long n);
 unsigned long __must_check __copy_from_user_ll(void *to,
 				const void __user *from, unsigned long n);
+unsigned long __must_check __copy_from_user_ll_nocache(void *to,
+				const void __user *from, unsigned long n);
 
 /*
  * Here we special-case 1, 2 and 4-byte copy_*_user invocations.  On a fault
@@ -478,12 +480,43 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
 	return __copy_from_user_ll(to, from, n);
 }
 
+#define ARCH_HAS_NOCACHE_UACCESS
+
+static __always_inline unsigned long __copy_from_user_inatomic_nocache(void *to,
+				const void __user *from, unsigned long n)
+{
+	if (__builtin_constant_p(n)) {
+		unsigned long ret;
+
+		switch (n) {
+		case 1:
+			__get_user_size(*(u8 *)to, from, 1, ret, 1);
+			return ret;
+		case 2:
+			__get_user_size(*(u16 *)to, from, 2, ret, 2);
+			return ret;
+		case 4:
+			__get_user_size(*(u32 *)to, from, 4, ret, 4);
+			return ret;
+		}
+	}
+	return __copy_from_user_ll_nocache(to, from, n);
+}
+
 static __always_inline unsigned long
 __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
        might_sleep();
        return __copy_from_user_inatomic(to, from, n);
 }
+
+static __always_inline unsigned long
+__copy_from_user_nocache(void *to, const void __user *from, unsigned long n)
+{
+       might_sleep();
+       return __copy_from_user_inatomic_nocache(to, from, n);
+}
+
 unsigned long __must_check copy_to_user(void __user *to,
 				const void *from, unsigned long n);
 unsigned long __must_check copy_from_user(void *to,
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
new file mode 100644
index 0000000000000000000000000000000000000000..391e7ed1eb3fc05ea2eeb8548e42381c8aefa7e5
--- /dev/null
+++ b/include/linux/uaccess.h
@@ -0,0 +1,22 @@
+#ifndef __LINUX_UACCESS_H__
+#define __LINUX_UACCESS_H__
+
+#include <asm/uaccess.h>
+
+#ifndef ARCH_HAS_NOCACHE_UACCESS
+
+static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
+				const void __user *from, unsigned long n)
+{
+	return __copy_from_user_inatomic(to, from, n);
+}
+
+static inline unsigned long __copy_from_user_nocache(void *to,
+				const void __user *from, unsigned long n)
+{
+	return __copy_from_user(to, from, n);
+}
+
+#endif		/* ARCH_HAS_NOCACHE_UACCESS */
+
+#endif		/* __LINUX_UACCESS_H__ */
diff --git a/mm/filemap.c b/mm/filemap.c
index 368678c2d53107d3cabf3c98a15bccaa37dfd3f5..807a463fd5ed69b1d39d5268dddf92464c81ed0f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/compiler.h>
 #include <linux/fs.h>
+#include <linux/uaccess.h>
 #include <linux/aio.h>
 #include <linux/capability.h>
 #include <linux/kernel_stat.h>
@@ -38,7 +39,6 @@
  */
 #include <linux/buffer_head.h> /* for generic_osync_inode */
 
-#include <asm/uaccess.h>
 #include <asm/mman.h>
 
 static ssize_t
@@ -1902,7 +1902,7 @@ __filemap_copy_from_user_iovec(char *vaddr,
 		int copy = min(bytes, iov->iov_len - base);
 
 		base = 0;
-		left = __copy_from_user_inatomic(vaddr, buf, copy);
+		left = __copy_from_user_inatomic_nocache(vaddr, buf, copy);
 		copied += copy;
 		bytes -= copy;
 		vaddr += copy;
diff --git a/mm/filemap.h b/mm/filemap.h
index 13793ba0ce172714355abee0ddd3291622370461..5683cde22055f39e86c5bb9f670e615007c24b7c 100644
--- a/mm/filemap.h
+++ b/mm/filemap.h
@@ -13,7 +13,7 @@
 #include <linux/highmem.h>
 #include <linux/uio.h>
 #include <linux/config.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 size_t
 __filemap_copy_from_user_iovec(char *vaddr,
@@ -34,13 +34,13 @@ filemap_copy_from_user(struct page *page, unsigned long offset,
 	int left;
 
 	kaddr = kmap_atomic(page, KM_USER0);
-	left = __copy_from_user_inatomic(kaddr + offset, buf, bytes);
+	left = __copy_from_user_inatomic_nocache(kaddr + offset, buf, bytes);
 	kunmap_atomic(kaddr, KM_USER0);
 
 	if (left != 0) {
 		/* Do it the slow way */
 		kaddr = kmap(page);
-		left = __copy_from_user(kaddr + offset, buf, bytes);
+		left = __copy_from_user_nocache(kaddr + offset, buf, bytes);
 		kunmap(page);
 	}
 	return bytes - left;