美文网首页
PHP7 内核源码 strstr 查找字符串: Sunday 字

PHP7 内核源码 strstr 查找字符串: Sunday 字

作者: 过往云技 | 来源:发表于2019-04-07 17:21 被阅读0次

strstr

返回 haystack 字符串从 needle 第一次出现的位置开始到 haystack 结尾的字符串。
strstr( string $haystack, mixed $needle[, bool $before_needle = FALSE] ) : string

源码实现

/* {{{ proto string strstr(string haystack, string needle[, bool part])
   Finds first occurrence of a string within another */
PHP_FUNCTION(strstr)
{
    zval *needle;
    zend_string *haystack;
    const char *found = NULL;
    char needle_char[2];
    zend_long found_offset;
    zend_bool part = 0;

    ZEND_PARSE_PARAMETERS_START(2, 3)
        Z_PARAM_STR(haystack)
        Z_PARAM_ZVAL(needle)
        Z_PARAM_OPTIONAL
        Z_PARAM_BOOL(part)
    ZEND_PARSE_PARAMETERS_END();

    if (Z_TYPE_P(needle) == IS_STRING) { 
        if (!Z_STRLEN_P(needle)) {
            php_error_docref(NULL, E_WARNING, "Empty needle");
            RETURN_FALSE;
        }

        found = php_memnstr(ZSTR_VAL(haystack), Z_STRVAL_P(needle), Z_STRLEN_P(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
    } else {
        //如果 needle 不是一个字符串,那么它将被转化为整型并且作为字符的序号来使用。
        if (php_needle_char(needle, needle_char) != SUCCESS) {
            RETURN_FALSE;
        }
        needle_char[1] = 0;

        php_error_docref(NULL, E_DEPRECATED,
            "Non-string needles will be interpreted as strings in the future. " \
            "Use an explicit chr() call to preserve the current behavior");

        found = php_memnstr(ZSTR_VAL(haystack), needle_char, 1, ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
    }

    if (found) {
        found_offset = found - ZSTR_VAL(haystack);
        if (part) {
            RETURN_STRINGL(ZSTR_VAL(haystack), found_offset);
        } else {
            RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
        }
    }
    RETURN_FALSE;
}
/* }}} */

核心算法实现追踪 php.h 文件: php_memnstr

#define php_memnstr zend_memnstr

追踪 zend_operators.h : zend_memnstr

static zend_always_inline const char *
zend_memnstr(const char *haystack, const char *needle, size_t needle_len, const char *end)
{
    const char *p = haystack;
    const char ne = needle[needle_len-1];
    ptrdiff_t off_p;
    size_t off_s;

    if (needle_len == 1) {
        return (const char *)memchr(p, *needle, (end-p));
    }

    off_p = end - haystack;
    off_s = (off_p > 0) ? (size_t)off_p : 0;

    if (needle_len > off_s) {
        return NULL;
    }
    //当输入的字符串小于 1024字符或查找字符串长度小于9 的时候,使用系统库 glibc  memchr 查找
    if (EXPECTED(off_s < 1024 || needle_len < 9)) { /* glibc memchr is faster when needle is too short */
        end -= needle_len;

        while (p <= end) {
            if ((p = (const char *)memchr(p, *needle, (end-p+1))) && ne == p[needle_len-1]) {
                if (!memcmp(needle+1, p+1, needle_len-2)) {
                    return p;
                }
            }

            if (p == NULL) {
                return NULL;
            }

            p++;
        }

        return NULL;
    } else {
        //使用 Sunday 算法查找
        return zend_memnstr_ex(haystack, needle, needle_len, end);
    }
}

zend_memnstr_ex 实现

ZEND_API const char* ZEND_FASTCALL zend_memnstr_ex(const char *haystack, const char *needle, size_t needle_len, const char *end) /* {{{ */
{
    unsigned int td[256];
    register size_t i;
    register const char *p;

    if (needle_len == 0 || (end - haystack) < needle_len) {
        return NULL;
    }
    //前缀表
    zend_memnstr_ex_pre(td, needle, needle_len, 0);

    p = haystack;
    end -= needle_len;

    while (p <= end) {
        for (i = 0; i < needle_len; i++) {
            if (needle[i] != p[i]) {
                break;
            }
        }
        if (i == needle_len) {
            return p;
        }
        if (UNEXPECTED(p == end)) {
            return NULL;
        }
        p += td[(unsigned char)(p[needle_len])];
    }

    return NULL;
}
/* }}} */

zend_memnstr_ex_pre 实现 (Sunday 字符串匹配算法,速度最快的字符串匹配算法,)

  • 如果字母表与模式的长度相比较大,则算法平均执行O(n / m)比较
/*
 * String matching - Sunday algorithm
 * http://www.iti.fh-flensburg.de/lang/algorithmen/pattern/sundayen.htm
 */
static zend_always_inline void zend_memnstr_ex_pre(unsigned int td[], const char *needle, size_t needle_len, int reverse) /* {{{ */ {
    int i;
    //生成256字符表
    for (i = 0; i < 256; i++) {
        td[i] = needle_len + 1;
    }

    if (reverse) {
        for (i = needle_len - 1; i >= 0; i--){
            // 整型 [ ASCII 码] => X
            td[(unsigned char)needle[i]] = i + 1;
        }
    } else {
        size_t i;

        for (i = 0; i < needle_len; i++) {
            td[(unsigned char)needle[i]] = (int)needle_len - i;
        }
    }
}
/* }}} */

字符串匹配算法——Sunday(PHP实现)

相关文章

网友评论

      本文标题:PHP7 内核源码 strstr 查找字符串: Sunday 字

      本文链接:https://www.haomeiwen.com/subject/bsqjiqtx.html