strstr
返回 haystack 字符串从 needle 第一次出现的位置开始到 haystack 结尾的字符串。
strstr( string $haystack, mixed $needle[, bool $before_needle = FALSE] ) : string
源码实现
/* {{{ proto string strstr(string haystack, string needle[, bool part])
Finds first occurrence of a string within another */
PHP_FUNCTION(strstr)
{
zval *needle;
zend_string *haystack;
const char *found = NULL;
char needle_char[2];
zend_long found_offset;
zend_bool part = 0;
ZEND_PARSE_PARAMETERS_START(2, 3)
Z_PARAM_STR(haystack)
Z_PARAM_ZVAL(needle)
Z_PARAM_OPTIONAL
Z_PARAM_BOOL(part)
ZEND_PARSE_PARAMETERS_END();
if (Z_TYPE_P(needle) == IS_STRING) {
if (!Z_STRLEN_P(needle)) {
php_error_docref(NULL, E_WARNING, "Empty needle");
RETURN_FALSE;
}
found = php_memnstr(ZSTR_VAL(haystack), Z_STRVAL_P(needle), Z_STRLEN_P(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
} else {
//如果 needle 不是一个字符串,那么它将被转化为整型并且作为字符的序号来使用。
if (php_needle_char(needle, needle_char) != SUCCESS) {
RETURN_FALSE;
}
needle_char[1] = 0;
php_error_docref(NULL, E_DEPRECATED,
"Non-string needles will be interpreted as strings in the future. " \
"Use an explicit chr() call to preserve the current behavior");
found = php_memnstr(ZSTR_VAL(haystack), needle_char, 1, ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
}
if (found) {
found_offset = found - ZSTR_VAL(haystack);
if (part) {
RETURN_STRINGL(ZSTR_VAL(haystack), found_offset);
} else {
RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
}
}
RETURN_FALSE;
}
/* }}} */
核心算法实现追踪 php.h 文件: php_memnstr
#define php_memnstr zend_memnstr
追踪 zend_operators.h : zend_memnstr
static zend_always_inline const char *
zend_memnstr(const char *haystack, const char *needle, size_t needle_len, const char *end)
{
const char *p = haystack;
const char ne = needle[needle_len-1];
ptrdiff_t off_p;
size_t off_s;
if (needle_len == 1) {
return (const char *)memchr(p, *needle, (end-p));
}
off_p = end - haystack;
off_s = (off_p > 0) ? (size_t)off_p : 0;
if (needle_len > off_s) {
return NULL;
}
//当输入的字符串小于 1024字符或查找字符串长度小于9 的时候,使用系统库 glibc memchr 查找
if (EXPECTED(off_s < 1024 || needle_len < 9)) { /* glibc memchr is faster when needle is too short */
end -= needle_len;
while (p <= end) {
if ((p = (const char *)memchr(p, *needle, (end-p+1))) && ne == p[needle_len-1]) {
if (!memcmp(needle+1, p+1, needle_len-2)) {
return p;
}
}
if (p == NULL) {
return NULL;
}
p++;
}
return NULL;
} else {
//使用 Sunday 算法查找
return zend_memnstr_ex(haystack, needle, needle_len, end);
}
}
zend_memnstr_ex 实现
ZEND_API const char* ZEND_FASTCALL zend_memnstr_ex(const char *haystack, const char *needle, size_t needle_len, const char *end) /* {{{ */
{
unsigned int td[256];
register size_t i;
register const char *p;
if (needle_len == 0 || (end - haystack) < needle_len) {
return NULL;
}
//前缀表
zend_memnstr_ex_pre(td, needle, needle_len, 0);
p = haystack;
end -= needle_len;
while (p <= end) {
for (i = 0; i < needle_len; i++) {
if (needle[i] != p[i]) {
break;
}
}
if (i == needle_len) {
return p;
}
if (UNEXPECTED(p == end)) {
return NULL;
}
p += td[(unsigned char)(p[needle_len])];
}
return NULL;
}
/* }}} */
zend_memnstr_ex_pre 实现 (Sunday 字符串匹配算法,速度最快的字符串匹配算法,)
- 如果字母表与模式的长度相比较大,则算法平均执行O(n / m)比较
/*
* String matching - Sunday algorithm
* http://www.iti.fh-flensburg.de/lang/algorithmen/pattern/sundayen.htm
*/
static zend_always_inline void zend_memnstr_ex_pre(unsigned int td[], const char *needle, size_t needle_len, int reverse) /* {{{ */ {
int i;
//生成256字符表
for (i = 0; i < 256; i++) {
td[i] = needle_len + 1;
}
if (reverse) {
for (i = needle_len - 1; i >= 0; i--){
// 整型 [ ASCII 码] => X
td[(unsigned char)needle[i]] = i + 1;
}
} else {
size_t i;
for (i = 0; i < needle_len; i++) {
td[(unsigned char)needle[i]] = (int)needle_len - i;
}
}
}
/* }}} */
网友评论