❀ str_c:字符串拼接,与R语言自带的paste和paste0函数具有相似的作用
str_c(..., sep = "", collapse = NULL)
library(stringr)
# 默认无向量分割符拼接
str_c("Letter: ", letters)
## [1] "Letter: a" "Letter: b" "Letter: c" "Letter: d" "Letter: e" "Letter: f" "Letter: g" "Letter: h" "Letter: i"
## [10] "Letter: j" "Letter: k" "Letter: l" "Letter: m" "Letter: n" "Letter: o" "Letter: p" "Letter: q" "Letter: r"
## [19] "Letter: s" "Letter: t" "Letter: u" "Letter: v" "Letter: w" "Letter: x" "Letter: y" "Letter: z"
str_c(letters, " is for", "...")
## [1] "a is for..." "b is for..." "c is for..." "d is for..." "e is for..." "f is for..." "g is for..."
## [8] "h is for..." "i is for..." "j is for..." "k is for..." "l is for..." "m is for..." "n is for..."
## [15] "o is for..." "p is for..." "q is for..." "r is for..." "s is for..." "t is for..." "u is for..."
## [22] "v is for..." "w is for..." "x is for..." "y is for..." "z is for..."
# 指定向量分隔符
str_c("Letter", letters, sep = ": ")
## [1] "Letter: a" "Letter: b" "Letter: c" "Letter: d" "Letter: e" "Letter: f" "Letter: g" "Letter: h" "Letter: i"
## [10] "Letter: j" "Letter: k" "Letter: l" "Letter: m" "Letter: n" "Letter: o" "Letter: p" "Letter: q" "Letter: r"
## [19] "Letter: s" "Letter: t" "Letter: u" "Letter: v" "Letter: w" "Letter: x" "Letter: y" "Letter: z"
str_c(letters, collapse = ", ")
[1] "a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z"
# 与 paste() 的区别
str_c(c("a", NA, "b"), "-d")
## [1] "a-d" NA "b-d"
paste0(c("a", NA, "b"), "-d")
## [1] "a-d" "NA-d" "b-d"
# 使用 str_replace_NA 显示文字 NA
str_c(str_replace_na(c("a", NA, "b")), "-d")
## [1] "a-d" "NA-d" "b-d"
★ collapse用于将输出合并为单个字符串的可选字符串
★ str_c类似于paste0(),但使用 tidyverse 回收和 NA规则
❀ str_length:计算长度/宽度
str_length(string) # 返回字符串中代码点的数量。
str_length(letters)
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
str_length(NA)
## [1] NA
str_length(factor("abc"))
## [1] 3
str_length(c("i", "like", "programming", NA))
## [1] 1 4 11 NA
❀ str_subset:查找匹配元素
str_subset(string, pattern, negate = FALSE)
fruit <- c("apple", "banana", "pear", "pineapple")
str_subset(fruit, "a")
##[1] "apple" "banana" "pear" "pineapple"
str_subset(fruit, "^a")
## [1] "apple"
str_subset(fruit, "a$")
## [1] "banana"
str_subset(fruit, "b")
##[1] "banana"
str_subset(fruit, "[aeiou]")
##[1] "apple" "banana" "pear" "pineapple"
# 不匹配的元素
str_subset(fruit, "^p", negate = TRUE)
## [1] "apple" "banana"
★ negate如果TRUE,返回不匹配的元素
❀ str_sub:使用它们的位置获取和设置子串
str_sub(string, start = 1L, end = -1L)
str_sub(string, start = 1L, end = -1L, omit_na = FALSE) <- value
str_sub_all(string, start = 1L, end = -1L)
hw <- "Hadley Wickham"
str_sub(hw, 1, 6) # 1和6分别是子集的起始和中止位置
## [1] "Hadley"
str_sub(hw, end = 6)
##[1] "Hadley"
str_sub(hw, 8)
## [1] "Wickham"
# 正数表示从前往后数,负数表示从后往前数
str_sub(hw, -7)
## [1] "Wickham"
str_sub(hw, end = -7)
## [1] "Hadley W"
# 1和8分别是两个子集的起始位置,6和14分别是两个子集的终止位置
str_sub(hw, c(1, 8), c(6, 14))
## [1] "Hadley" "Wickham"
# 若想从多个字符串中提取多个位置,使用str_sub_all()
x <- c("abcde", "ghifgh")
str_sub(x, c(1, 2), c(2, 4))
## [1] "ab" "hif"
str_sub_all(x, start = c(1, 2), end = c(2, 4))
## [[1]]
## [1] "ab" "bcd"
##
## [[2]]
## [1] "gh" "hif"
# 使用str_sub()函数的赋值形式来修改字符串
x <- "BBCDEF"
str_sub(x, 1, 1) <- "A"; x
## [1] "ABCDEF"
str_sub(x, -1, -1) <- "K"; x
## [1] "ABCDEK"
str_sub(x, -2, -2) <- "GHIJ"; x
## [1] "ABCDGHIJK"
str_sub(x, 2, -2) <- ""; x
## [1] "AK"
❀ str_locate:查找匹配的位置
str_locate(string, pattern)
str_locate_all(string, pattern)
fruit <- c("apple", "banana", "pear", "pineapple")
# 为每个元素返回第一个匹配的具有两列和一行的整数矩阵
str_locate(fruit, "$")
## start end
## [1,] 6 5
## [2,] 7 6
## [3,] 5 4
## [4,] 10 9
str_locate(fruit, "e")
## start end
## [1,] 5 5
## [2,] NA NA
## [3,] 2 2
## [4,] 4 4
str_locate(fruit, c("a", "b", "p", "p"))
## start end
## [1,] 1 1
## [2,] 1 1
## [3,] 1 1
## [4,] 1 1
# 返回所有符合元素的矩阵列表
str_locate_all(fruit, "a")
## [[1]]
## start end
## [1,] 1 1
##
## [[2]]
## start end
## [1,] 2 2
## [2,] 4 4
## [3,] 6 6
##
## [[3]]
## start end
## [1,] 3 3
##
## [[4]]
## start end
## [1,] 5 5
str_locate_all(fruit, c("a", "b", "p", "p"))
## [[1]]
## start end
## [1,] 1 1
##
## [[2]]
## start end
## [1,] 1 1
##
## [[3]]
## start end
## [1,] 1 1
##
## [[4]]
## start end
## [1,] 1 1
## [2,] 6 6
## [3,] 7 7
❀ str_extract:字符串提取
str_extract(string, pattern, group = NULL)
str_extract_all(string, pattern, simplify = FALSE)
shopping_list <- c("apples x4", "bag of flour", "bag of sugar", "milk x2")
str_extract(shopping_list, "\\d") # \\d 匹配一个数字字符, 等价于 [0-9]
## [1] "4" NA NA "2"
str_extract(shopping_list, "[a-z]+")
## [1] "apples" "bag" "bag" "milk"
str_extract(shopping_list, "[a-z]{1,4}")
## [1] "appl" "bag" "bag" "milk"
str_extract(shopping_list, "\\b[a-z]{1,4}\\b") # \\b特指单词边界
## [1] NA "bag" "bag" "milk"
str_extract(shopping_list, "([a-z]+) of ([a-z]+)")
## [1] NA "bag of flour" "bag of sugar" NA
# 提取所有匹配项
str_extract_all(shopping_list, "[a-z]+")
## [[1]]
## [1] "apples" "x"
##
## [[2]]
## [1] "bag" "of" "flour"
##
## [[3]]
## [1] "bag" "of" "sugar"
##
## [[4]]
## [1] "milk" "x"
str_extract_all(shopping_list, "\\b[a-z]+\\b")
## [[1]]
## [1] "apples"
##
## [[2]]
## [1] "bag" "of" "flour"
##
## [[3]]
## [1] "bag" "of" "sugar"
##
## [[4]]
## [1] "milk"
str_extract_all(shopping_list, "\\d")
## [[1]]
## [1] "4"
##
## [[2]]
## character(0)
##
## [[3]]
## character(0)
##
## [[4]]
## [1] "2"
# 将结果简化为字符矩阵
str_extract_all(shopping_list, "\\b[a-z]+\\b", simplify = TRUE)
## [,1] [,2] [,3]
## [1,] "apples" "" ""
## [2,] "bag" "of" "flour"
## [3,] "bag" "of" "sugar"
## [4,] "milk" "" ""
str_extract_all(shopping_list, "\\d", simplify = TRUE)
## [,1]
## [1,] "4"
## [2,] ""
## [3,] ""
## [4,] "2"
❀ str_dup:字符串复制
str_dup(string, times) # time表示重复的次数
fruit <- c("apple", "pear", "banana")
str_dup(fruit, 2)
## [1] "appleapple" "pearpear" "bananabanana"
str_dup(fruit, 1:3)
## [1] "apple" "pearpear" "bananabananabanana"
str_c("ba", str_dup("na", 0:5))
## [1] "ba" "bana" "banana" "bananana" "banananana" "bananananana"
❀ str_count:字符串计数
str_count(string, pattern = "")
fruit <- c("apple", "banana", "pear", "pineapple")
str_count(fruit, "a")
## [1] 1 3 1 1
str_count(fruit, c("a", "b", "p", "p"))
## [1] 1 1 1 3
❀ str_split:字符串分割
str_split(string, pattern, n = Inf, simplify = FALSE) # 接受一个字符向量并返回一个列表
str_split_1(string, pattern) # 接受一个字符串并返回一个字符向量
str_split_fixed(string, pattern, n) # 接受一个字符向量并返回一个矩阵
str_split_i(string, pattern, i) # 接受一个字符向量并返回一个字符向量
fruits <- c(
"apples and oranges and pears and bananas",
"pineapples and mangos and guavas"
)
str_split(fruits, " and ")
## [[1]]
## [1] "apples" "oranges" "pears" "bananas"
##
## [[2]]
## [1] "pineapples" "mangos" "guavas"
str_split(fruits, " and ", simplify = TRUE)
## [,1] [,2] [,3] [,4]
## [1,] "apples" "oranges" "pears" "bananas"
## [2,] "pineapples" "mangos" "guavas" ""
# 拆分单个字符串
str_split_1(fruits[[1]], " and ")
## [1] "apples" "oranges" "pears" "bananas"
# 指定 n 以限制可能匹配的数量
str_split(fruits, " and ", n = 3)
## [[1]]
## [1] "apples" "oranges" "pears and bananas"
##
## [[2]]
## [1] "pineapples" "mangos" "guavas"
# 若n 大于件数,则不会发生填充
str_split(fruits, " and ", n = 5)
## [[1]]
## [1] "apples" "oranges" "pears" "bananas"
##
## [[2]]
## [1] "pineapples" "mangos" "guavas"
# 使用 fixed 返回一个字符矩阵
str_split_fixed(fruits, " and ", 3)
## [,1] [,2] [,3]
## [1,] "apples" "oranges" "pears and bananas"
## [2,] "pineapples" "mangos" "guavas"
# 仅从字符串中提取单个片段
str_split_i(fruits, " and ", 1)
## [1] "apples" "pineapples"
str_split_i(fruits, " and ", 4)
## [1] "bananas" NA
# 使用负数从末尾选择
str_split_i(fruits, " and ", -1)
## [1] "bananas" "guavas"
❀ str_replace:字符串替换
str_replace(string, pattern, replacement)
str_replace_all(string, pattern, replacement)
fruits <- c("one apple", "two pears", "three bananas")
str_replace(fruits, "[aeiou]", "-")
## [1] "-ne apple" "tw- pears" "thr-e bananas"
str_replace_all(fruits, "[aeiou]", "-")
## [1] "-n- -ppl-" "tw- p--rs" "thr-- b-n-n-s"
str_replace(fruits, "([aeiou])", "\\1\\1") # \\1是匹配第一个分组匹配到的内容
## [1] "oone apple" "twoo pears" "threee bananas"
# str_replace() 是沿着文本、模式和替换向量化的
str_replace(fruits, "[aeiou]", c("1", "2", "3"))
## [1] "1ne apple" "tw2 pears" "thr3e bananas"
# 将多个模式和替换应用于同一字符串
fruits %>%
str_c(collapse = "---") %>%
str_replace_all(c("one" = "1", "two" = "2", "three" = "3"))
## [1] "1 apple---2 pears---3 bananas"
参考:
网友评论