diff --git a/ChangeLog b/ChangeLog
index 313d8865f780c0cdb5a34fb5363d92a711100422..82a4fd092695a68c9d730cfdca65497db7f8658a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -3490,4 +3490,7 @@
 	  the ARMv7-M family contributed by Mike Smith.
 	* lib/strings/lib_vikmemcpy.c:  As an option, the larger but faster
 	  implemementation of memcpy from Daniel Vik is now available (this is
-	  from http://www.danielvik.com/2010/02/fast-memcpy-in-c.html).
\ No newline at end of file
+	  from http://www.danielvik.com/2010/02/fast-memcpy-in-c.html).
+	* lib/strings/lib_memset.c: CONFIG_MEMSET_OPTSPEED will select a
+	  version of memset() optimized for speed.  By default, memset() is
+	  optimized for size.
diff --git a/Documentation/NuttShell.html b/Documentation/NuttShell.html
index ad204f5dc9772236d6525723db55f48da610a477..78a565107401e27482abb6950b34e0cb2260b16f 100644
--- a/Documentation/NuttShell.html
+++ b/Documentation/NuttShell.html
@@ -8,7 +8,7 @@
   <tr align="center" bgcolor="#e4e4e4">
     <td>
       <h1><big><font color="#3c34ec"><i>NuttShell (NSH)</i></font></big></h1>
-      <p>Last Updated: August 28, 2012</p>
+      <p>Last Updated: October 20, 2012</p>
     </td>
   </tr>
 </table>
diff --git a/Documentation/NuttxPortingGuide.html b/Documentation/NuttxPortingGuide.html
index a16032db5757771e2052e2da10e1a7f8b97308dc..e43ca8a2fa0eade64af17a0c8a6b41635b1dcd3c 100644
--- a/Documentation/NuttxPortingGuide.html
+++ b/Documentation/NuttxPortingGuide.html
@@ -12,7 +12,7 @@
       <h1><big><font color="#3c34ec">
         <i>NuttX RTOS Porting Guide</i>
       </font></big></h1>
-      <p>Last Updated: August 28, 2012</p>
+      <p>Last Updated: October 20, 2012</p>
     </td>
   </tr>
 </table>
@@ -4449,12 +4449,12 @@ build
   If <code>CONFIG_ARCH_MEMCPY</code> is <b>not</b> selected, then you make also select Daniel
   Vik's optimized implementation of <code>memcpy()</code>:
 </p>
-  <ul><li>
-    <code>CONFIG_MEMCPY_VIK</code>:
-    Select this option to use the optimized <code>memcpy()</code> function by Daniel Vik.
-    See licensing information in the top-level <code>COPYING</code> file.
-    Default: <code>n</code>.
-  </li></ul>
+<ul><li>
+  <code>CONFIG_MEMCPY_VIK</code>:
+  Select this option to use the optimized <code>memcpy()</code> function by Daniel Vik.
+  See licensing information in the top-level <code>COPYING</code> file.
+  Default: <code>n</code>.
+</li></ul>
 
 <p>
   And if <code>CONFIG_MEMCPY_VIK</code>, the following tuning options are available:
@@ -4474,6 +4474,15 @@ build
     Compiles memcpy for 64 bit architectures
 </li></ul>
 
+<p><li>
+  If <code>CONFIG_ARCH_MEMSET</code> is <b>not</b> selected, then the following option is also available:
+</p>
+<ul><li>
+  <code>CONFIG_MEMSET_OPTSPEED</code>:
+  Select this option to use a version of <code>memset()</code> optimized for speed.
+  Default: <code>memset()</code> is optimized for size.
+</li></ul>
+
 <li>
   <p>
     The architecture may provide custom versions of certain standard header files:
diff --git a/configs/README.txt b/configs/README.txt
index cc65540d7fe02b62722c67458b051e7385baf840..0bb531d67a4108a7c4d22bb03546e795655adf8b 100644
--- a/configs/README.txt
+++ b/configs/README.txt
@@ -638,6 +638,12 @@ defconfig -- This is a configuration file similar to the Linux
 
     CONFIG_MEMCPY_64BIT - Compiles memcpy for 64 bit architectures
 
+  If CONFIG_ARCH_MEMSET is not selected, then the following option is
+  also available:
+
+    CONFIG_MEMSET_OPTSPEED - Select this option to use a version of memcpy()
+      optimized for speed. Default: memcpy() is optimized for size.
+
   The architecture may provide custom versions of certain standard header
   files:
 
diff --git a/lib/Kconfig b/lib/Kconfig
index b3f743db28b05396d7ca9ca56bf78b46951e54e1..0f25c89238f13dbe233247b13b63fbc599315a13 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -153,19 +153,20 @@ config ARCH_OPTIMIZED_FUNCTIONS
 
 if ARCH_OPTIMIZED_FUNCTIONS
 config ARCH_MEMCPY
-	bool "memcpy"
+	bool "memcpy()"
 	default n
 	---help---
 		Select this option if the architecture provides an optimized version
 		of memcpy().
 
 config MEMCPY_VIK
-	bool "Vik memcpy"
+	bool "Vik memcpy()"
 	default n
 	depends on !ARCH_MEMCPY
 	---help---
-	Select this option to use the optimized memcpy() function by Daniel Vik.
-	See licensing information in the top-level COPYING file.
+		Select this option to use the optimized memcpy() function by Daniel Vik.
+		Select this option to option for speed at the expense of increased size.
+		See licensing information in the top-level COPYING file.
 
 if MEMCPY_VIK
 config MEMCPY_PRE_INC_PTRS
@@ -182,50 +183,58 @@ config MEMCPY_INDEXED_COPY
 		MEMCPY_PRE_INC_PTRS option.
 
 config MEMCPY_64BIT
-	bool "64-bit memcpy"
+	bool "64-bit memcpy()"
 	default n
 	---help---
-		Compiles memcpy for 64 bit architectures
+		Compiles memcpy() for 64 bit architectures
 
 endif
 
 config ARCH_MEMCMP
-	bool "memcmp"
+	bool "memcmp()"
 	default n
 	---help---
 		Select this option if the architecture provides an optimized version
 		of memcmp().
 
 config ARCH_MEMMOVE
-	bool "memmove"
+	bool "memmove()"
 	default n
 	---help---
 		Select this option if the architecture provides an optimized version
 		of memmove().
 
 config ARCH_MEMSET
-	bool "memset"
+	bool "memset()"
 	default n
 	---help---
 		Select this option if the architecture provides an optimized version
 		of memset().
 
+config MEMSET_OPTSPEED
+	bool "Optimize memset() for speed"
+	default n
+	depends on !ARCH_MEMSET
+	---help---
+		Select this option to use a version of memcpy() optimized for speed.
+		Default: memcpy() is optimized for size.
+
 config ARCH_STRCMP
-	bool "strcmp"
+	bool "strcmp()"
 	default n
 	---help---
 		Select this option if the architecture provides an optimized version
 		of strcmp().
 
 config ARCH_STRCPY
-	bool "strcpy"
+	bool "strcpy()"
 	default n
 	---help---
 		Select this option if the architecture provides an optimized version
 		of strcpy().
 
 config ARCH_STRNCPY
-	bool "strncpy"
+	bool "strncpy()"
 	default n
 	---help---
 		Select this option if the architecture provides an optimized version
@@ -239,14 +248,14 @@ config ARCH_STRLEN
 		of strlen().
 
 config ARCH_STRNLEN
-	bool "strlen"
+	bool "strlen()"
 	default n
 	---help---
 		Select this option if the architecture provides an optimized version
 		of strnlen().
 
 config ARCH_BZERO
-	bool "bzero"
+	bool "bzero()"
 	default n
 	---help---
 		Select this option if the architecture provides an optimized version
diff --git a/lib/string/lib_memset.c b/lib/string/lib_memset.c
index 916351b974ab090ad1b9a94b646f2c228a7efb43..c910d2ce042f029a5f5a6b76ac85f48f8a331bd2 100644
--- a/lib/string/lib_memset.c
+++ b/lib/string/lib_memset.c
@@ -42,8 +42,12 @@
  ************************************************************/
 
 #include <nuttx/config.h>
+
 #include <sys/types.h>
+
+#include <stdint.h>
 #include <string.h>
+#include <assert.h>
 
 /************************************************************
  * Global Functions
@@ -52,8 +56,88 @@
 #ifndef CONFIG_ARCH_MEMSET
 void *memset(void *s, int c, size_t n)
 {
+#ifdef CONFIG_MEMSET_OPTSPEED
+  /* This version is optimized for speed (you could do better
+   * still by exploiting processor caching or memory burst
+   * knowledge.  64-bit support might improve performance as
+   * well.
+   */
+
+  uintptr_t addr  = (uintptr_t)s;
+  uint16_t  val16 = ((uint16_t)c << 8)  | (uint16_t)c;
+  uint32_t  val32 = ((uint32_t)val16 << 16)  | (uint32_t)val16;
+
+  /* Make sure that there is something to be cleared */
+
+  if (n > 0)
+    {
+      /* Align to a 16-bit boundary */
+
+      if ((addr & 1) != 0)
+        {
+          *(uint8_t*)addr = (uint8_t)c;
+          addr += 1;
+          n    -= 1;
+        }
+
+      /* Check if there are at least 16-bits left to be zeroed */
+
+      if (n >= 2)
+        {
+          /* Align to a 32-bit boundary (we know that the destination
+           * address is already aligned to at least a 16-bit boundary).
+           */
+
+          if ((addr & 3) != 0)
+            {
+              *(uint16_t*)addr = val16;
+              addr += 2;
+              n    -= 2;
+            }
+
+          /* Loop while there are at least 32-bits left to be zeroed */
+
+          while (n >= 4)
+            {
+              *(uint32_t*)addr = val32;
+              addr += 4;
+              n    -= 4;
+            }
+        }
+
+      /* We may get here under the following conditions:
+       *
+       *   n = 0, addr may or may not be aligned
+       *   n = 1, addr may or may not be aligned
+       *   n = 2, addr is aligned to a 32-bit boundary
+       *   n = 3, addr is aligned to a 32-bit boundary
+       */
+
+      switch (n)
+        {
+          default:
+          case 0:
+            DEBUGASSERT(n == 0);
+            break;
+
+          case 2:
+            *(uint16_t*)addr = val16;
+            break;
+
+          case 3:
+            *(uint16_t*)addr = val16;
+            addr += 2;
+          case 1:
+            *(uint8_t*)addr = (uint8_t)c;
+            break;
+        }
+    }
+#else
+  /* This version is optimized for size */
+
   unsigned char *p = (unsigned char*)s;
   while (n-- > 0) *p++ = c;
+#endif
   return s;
 }
 #endif