diff --git a/ChangeLog b/ChangeLog index 313d8865f780c0cdb5a34fb5363d92a711100422..82a4fd092695a68c9d730cfdca65497db7f8658a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3490,4 +3490,7 @@ the ARMv7-M family contributed by Mike Smith. * lib/strings/lib_vikmemcpy.c: As an option, the larger but faster implemementation of memcpy from Daniel Vik is now available (this is - from http://www.danielvik.com/2010/02/fast-memcpy-in-c.html). \ No newline at end of file + from http://www.danielvik.com/2010/02/fast-memcpy-in-c.html). + * lib/strings/lib_memset.c: CONFIG_MEMSET_OPTSPEED will select a + version of memset() optimized for speed. By default, memset() is + optimized for size. diff --git a/Documentation/NuttShell.html b/Documentation/NuttShell.html index ad204f5dc9772236d6525723db55f48da610a477..78a565107401e27482abb6950b34e0cb2260b16f 100644 --- a/Documentation/NuttShell.html +++ b/Documentation/NuttShell.html @@ -8,7 +8,7 @@ <tr align="center" bgcolor="#e4e4e4"> <td> <h1><big><font color="#3c34ec"><i>NuttShell (NSH)</i></font></big></h1> - <p>Last Updated: August 28, 2012</p> + <p>Last Updated: October 20, 2012</p> </td> </tr> </table> diff --git a/Documentation/NuttxPortingGuide.html b/Documentation/NuttxPortingGuide.html index a16032db5757771e2052e2da10e1a7f8b97308dc..e43ca8a2fa0eade64af17a0c8a6b41635b1dcd3c 100644 --- a/Documentation/NuttxPortingGuide.html +++ b/Documentation/NuttxPortingGuide.html @@ -12,7 +12,7 @@ <h1><big><font color="#3c34ec"> <i>NuttX RTOS Porting Guide</i> </font></big></h1> - <p>Last Updated: August 28, 2012</p> + <p>Last Updated: October 20, 2012</p> </td> </tr> </table> @@ -4449,12 +4449,12 @@ build If <code>CONFIG_ARCH_MEMCPY</code> is <b>not</b> selected, then you make also select Daniel Vik's optimized implementation of <code>memcpy()</code>: </p> - <ul><li> - <code>CONFIG_MEMCPY_VIK</code>: - Select this option to use the optimized <code>memcpy()</code> function by Daniel Vik. - See licensing information in the top-level <code>COPYING</code> file. - Default: <code>n</code>. - </li></ul> +<ul><li> + <code>CONFIG_MEMCPY_VIK</code>: + Select this option to use the optimized <code>memcpy()</code> function by Daniel Vik. + See licensing information in the top-level <code>COPYING</code> file. + Default: <code>n</code>. +</li></ul> <p> And if <code>CONFIG_MEMCPY_VIK</code>, the following tuning options are available: @@ -4474,6 +4474,15 @@ build Compiles memcpy for 64 bit architectures </li></ul> +<p><li> + If <code>CONFIG_ARCH_MEMSET</code> is <b>not</b> selected, then the following option is also available: +</p> +<ul><li> + <code>CONFIG_MEMSET_OPTSPEED</code>: + Select this option to use a version of <code>memset()</code> optimized for speed. + Default: <code>memset()</code> is optimized for size. +</li></ul> + <li> <p> The architecture may provide custom versions of certain standard header files: diff --git a/configs/README.txt b/configs/README.txt index cc65540d7fe02b62722c67458b051e7385baf840..0bb531d67a4108a7c4d22bb03546e795655adf8b 100644 --- a/configs/README.txt +++ b/configs/README.txt @@ -638,6 +638,12 @@ defconfig -- This is a configuration file similar to the Linux CONFIG_MEMCPY_64BIT - Compiles memcpy for 64 bit architectures + If CONFIG_ARCH_MEMSET is not selected, then the following option is + also available: + + CONFIG_MEMSET_OPTSPEED - Select this option to use a version of memcpy() + optimized for speed. Default: memcpy() is optimized for size. + The architecture may provide custom versions of certain standard header files: diff --git a/lib/Kconfig b/lib/Kconfig index b3f743db28b05396d7ca9ca56bf78b46951e54e1..0f25c89238f13dbe233247b13b63fbc599315a13 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -153,19 +153,20 @@ config ARCH_OPTIMIZED_FUNCTIONS if ARCH_OPTIMIZED_FUNCTIONS config ARCH_MEMCPY - bool "memcpy" + bool "memcpy()" default n ---help--- Select this option if the architecture provides an optimized version of memcpy(). config MEMCPY_VIK - bool "Vik memcpy" + bool "Vik memcpy()" default n depends on !ARCH_MEMCPY ---help--- - Select this option to use the optimized memcpy() function by Daniel Vik. - See licensing information in the top-level COPYING file. + Select this option to use the optimized memcpy() function by Daniel Vik. + Select this option to option for speed at the expense of increased size. + See licensing information in the top-level COPYING file. if MEMCPY_VIK config MEMCPY_PRE_INC_PTRS @@ -182,50 +183,58 @@ config MEMCPY_INDEXED_COPY MEMCPY_PRE_INC_PTRS option. config MEMCPY_64BIT - bool "64-bit memcpy" + bool "64-bit memcpy()" default n ---help--- - Compiles memcpy for 64 bit architectures + Compiles memcpy() for 64 bit architectures endif config ARCH_MEMCMP - bool "memcmp" + bool "memcmp()" default n ---help--- Select this option if the architecture provides an optimized version of memcmp(). config ARCH_MEMMOVE - bool "memmove" + bool "memmove()" default n ---help--- Select this option if the architecture provides an optimized version of memmove(). config ARCH_MEMSET - bool "memset" + bool "memset()" default n ---help--- Select this option if the architecture provides an optimized version of memset(). +config MEMSET_OPTSPEED + bool "Optimize memset() for speed" + default n + depends on !ARCH_MEMSET + ---help--- + Select this option to use a version of memcpy() optimized for speed. + Default: memcpy() is optimized for size. + config ARCH_STRCMP - bool "strcmp" + bool "strcmp()" default n ---help--- Select this option if the architecture provides an optimized version of strcmp(). config ARCH_STRCPY - bool "strcpy" + bool "strcpy()" default n ---help--- Select this option if the architecture provides an optimized version of strcpy(). config ARCH_STRNCPY - bool "strncpy" + bool "strncpy()" default n ---help--- Select this option if the architecture provides an optimized version @@ -239,14 +248,14 @@ config ARCH_STRLEN of strlen(). config ARCH_STRNLEN - bool "strlen" + bool "strlen()" default n ---help--- Select this option if the architecture provides an optimized version of strnlen(). config ARCH_BZERO - bool "bzero" + bool "bzero()" default n ---help--- Select this option if the architecture provides an optimized version diff --git a/lib/string/lib_memset.c b/lib/string/lib_memset.c index 916351b974ab090ad1b9a94b646f2c228a7efb43..c910d2ce042f029a5f5a6b76ac85f48f8a331bd2 100644 --- a/lib/string/lib_memset.c +++ b/lib/string/lib_memset.c @@ -42,8 +42,12 @@ ************************************************************/ #include <nuttx/config.h> + #include <sys/types.h> + +#include <stdint.h> #include <string.h> +#include <assert.h> /************************************************************ * Global Functions @@ -52,8 +56,88 @@ #ifndef CONFIG_ARCH_MEMSET void *memset(void *s, int c, size_t n) { +#ifdef CONFIG_MEMSET_OPTSPEED + /* This version is optimized for speed (you could do better + * still by exploiting processor caching or memory burst + * knowledge. 64-bit support might improve performance as + * well. + */ + + uintptr_t addr = (uintptr_t)s; + uint16_t val16 = ((uint16_t)c << 8) | (uint16_t)c; + uint32_t val32 = ((uint32_t)val16 << 16) | (uint32_t)val16; + + /* Make sure that there is something to be cleared */ + + if (n > 0) + { + /* Align to a 16-bit boundary */ + + if ((addr & 1) != 0) + { + *(uint8_t*)addr = (uint8_t)c; + addr += 1; + n -= 1; + } + + /* Check if there are at least 16-bits left to be zeroed */ + + if (n >= 2) + { + /* Align to a 32-bit boundary (we know that the destination + * address is already aligned to at least a 16-bit boundary). + */ + + if ((addr & 3) != 0) + { + *(uint16_t*)addr = val16; + addr += 2; + n -= 2; + } + + /* Loop while there are at least 32-bits left to be zeroed */ + + while (n >= 4) + { + *(uint32_t*)addr = val32; + addr += 4; + n -= 4; + } + } + + /* We may get here under the following conditions: + * + * n = 0, addr may or may not be aligned + * n = 1, addr may or may not be aligned + * n = 2, addr is aligned to a 32-bit boundary + * n = 3, addr is aligned to a 32-bit boundary + */ + + switch (n) + { + default: + case 0: + DEBUGASSERT(n == 0); + break; + + case 2: + *(uint16_t*)addr = val16; + break; + + case 3: + *(uint16_t*)addr = val16; + addr += 2; + case 1: + *(uint8_t*)addr = (uint8_t)c; + break; + } + } +#else + /* This version is optimized for size */ + unsigned char *p = (unsigned char*)s; while (n-- > 0) *p++ = c; +#endif return s; } #endif