strlen.c

作者: 树袋熊老师 | 来源:发表于2017-04-28 00:42 被阅读0次
    /* Copyright (C) 1991-2017 Free Software Foundation, Inc.
       This file is part of the GNU C Library.
       Written by Torbjorn Granlund (tege@sics.se),
       with help from Dan Sahlin (dan@sics.se);
       commentary by Jim Blandy (jimb@ai.mit.edu).
    
       The GNU C Library is free software; you can redistribute it and/or
       modify it under the terms of the GNU Lesser General Public
       License as published by the Free Software Foundation; either
       version 2.1 of the License, or (at your option) any later version.
    
       The GNU C Library is distributed in the hope that it will be useful,
       but WITHOUT ANY WARRANTY; without even the implied warranty of
       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       Lesser General Public License for more details.
    
       You should have received a copy of the GNU Lesser General Public
       License along with the GNU C Library; if not, see
       <http://www.gnu.org/licenses/>.  */
    
    #include <string.h>
    #include <stdlib.h>
    
    #undef strlen
    
    #ifndef STRLEN
    # define STRLEN strlen
    #endif
    
    /* Return the length of the null-terminated string STR.  Scan for
       the null terminator quickly by testing four bytes at a time.  */
    size_t
    STRLEN (const char *str)
    {
      const char *char_ptr;
      const unsigned long int *longword_ptr;
      unsigned long int longword, himagic, lomagic;
    
      /* Handle the first few characters by reading one character at a time.
         Do this until CHAR_PTR is aligned on a longword boundary.  */
      for (char_ptr = str; ((unsigned long int) char_ptr
                & (sizeof (longword) - 1)) != 0;
           ++char_ptr)
        if (*char_ptr == '\0')
          return char_ptr - str;
    
      /* All these elucidatory comments refer to 4-byte longwords,
         but the theory applies equally well to 8-byte longwords.  */
    
      longword_ptr = (unsigned long int *) char_ptr;
    
      /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits
         the "holes."  Note that there is a hole just to the left of
         each byte, with an extra at the end:
    
         bits:  01111110 11111110 11111110 11111111
         bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
    
         The 1-bits make sure that carries propagate to the next 0-bit.
         The 0-bits provide holes for carries to fall into.  */
      himagic = 0x80808080L;
      lomagic = 0x01010101L;
      if (sizeof (longword) > 4)
        {
          /* 64-bit version of the magic.  */
          /* Do the shift in two steps to avoid a warning if long has 32 bits.  */
          himagic = ((himagic << 16) << 16) | himagic;
          lomagic = ((lomagic << 16) << 16) | lomagic;
        }
      if (sizeof (longword) > 8)
        abort ();
    
      /* Instead of the traditional loop which tests each character,
         we will test a longword at a time.  The tricky part is testing
         if *any of the four* bytes in the longword in question are zero.  */
      for (;;)
        {
          longword = *longword_ptr++;
    
          if (((longword - lomagic) & ~longword & himagic) != 0)
        {
          /* Which of the bytes was the zero?  If none of them were, it was
             a misfire; continue the search.  */
    
          const char *cp = (const char *) (longword_ptr - 1);
    
          if (cp[0] == 0)
            return cp - str;
          if (cp[1] == 0)
            return cp - str + 1;
          if (cp[2] == 0)
            return cp - str + 2;
          if (cp[3] == 0)
            return cp - str + 3;
          if (sizeof (longword) > 4)
            {
              if (cp[4] == 0)
            return cp - str + 4;
              if (cp[5] == 0)
            return cp - str + 5;
              if (cp[6] == 0)
            return cp - str + 6;
              if (cp[7] == 0)
            return cp - str + 7;
            }
        }
        }
    }
    libc_hidden_builtin_def (strlen)
    

    上面是glibc-2.25的strlen函数的的源代码,我这里呢精简一下,因为我用的平台是64位的,所以我就省略了32位的一些判断,顺便加了一些注释,方便理解

    unsigned long long strlen (const char *str){
        for (const char *char_ptr = str; (unsigned long long) char_ptr & 7; ++char_ptr)
            if (*char_ptr == 0)
                return char_ptr - str;
        /*
        这里可能有人不懂,char_ptr & 7这是在做什么
        解释一下,这里考虑的一个对齐的,这个循环最多只能执行7次
        分别是001,010,011,100,101,110,111,
        即char_tr的低三位为如上值时
        */
        for (const unsigned long long *longword_ptr = (unsigned long long*) str;;++longword_ptr){
            unsigned long long longword = *longword_ptr;
            if ((longword - 0X0101010101010101) & ~longword & 0X8080808080808080){
                for (unsigned long long i = 0; i < 8; ++i) {
                    const char *cp = (const char *)longword_ptr;
                    if(cp[i] == 0)
                        return cp - str + i;
                }
                /*
                上面也有一个难点:就是那个条件
                上面的是64位的8字的,我用8字的char 讲一下
                char c;
                ~c & 0X80
                这个的结果只有两种
                一种是0X80,当且仅当~C的最高为1时就是C的最高位为0时成立
                一种是0
                (c-1)& 0X80的结果也只有两种
                一种是0X80,当且仅当(c-1)的最高为1时成立
                一种是0
                要使结果是非0,即0X80,只要同时满足两种条件
                (c-1)& 0X80 & (~c & 0X80)!= 0 即(c-1)& ~c & 0X80同时成立
                即c的最高位是0,而且(c-1)的最高位是1,当且仅当c等于0时成立
                */
            }
        }
    }
    

    有人喜欢的话,给点个赞

    相关文章

      网友评论

          本文标题:strlen.c

          本文链接:https://www.haomeiwen.com/subject/krxozttx.html