x86/i386: Use less assembly in strlen(), speed things up a bit
authorAlexey Dobriyan <[email protected]>
Sun, 11 Dec 2011 18:13:19 +0000 (21:13 +0300)
committerIngo Molnar <[email protected]>
Mon, 12 Dec 2011 17:33:42 +0000 (18:33 +0100)
Current i386 strlen() hardcodes NOT/DEC sequence. DEC is
mentioned to be suboptimal on Core2. So, put only REPNE SCASB
sequence in assembly, compiler can do the rest.

The difference in generated code is like below (MCORE2=y):

<strlen>:
push   %edi
mov    $0xffffffff,%ecx
mov    %eax,%edi
xor    %eax,%eax
repnz scas %es:(%edi),%al
not    %ecx

- dec    %ecx
- mov    %ecx,%eax
+ lea    -0x1(%ecx),%eax

pop    %edi
ret

Signed-off-by: Alexey Dobriyan <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Jan Beulich <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
arch/x86/lib/string_32.c

index 82004d2bf05e160bfa6faf15745f38eb4ad89f46..bd59090825dbabb96821c30c498d18cf2220857a 100644 (file)
@@ -164,15 +164,13 @@ EXPORT_SYMBOL(strchr);
 size_t strlen(const char *s)
 {
        int d0;
-       int res;
+       size_t res;
        asm volatile("repne\n\t"
-               "scasb\n\t"
-               "notl %0\n\t"
-               "decl %0"
+               "scasb"
                : "=c" (res), "=&D" (d0)
                : "1" (s), "a" (0), "0" (0xffffffffu)
                : "memory");
-       return res;
+       return ~res - 1;
 }
 EXPORT_SYMBOL(strlen);
 #endif