1 、连接字符串:str_c()
str_c(..., sep = "", collapse = NULL)
sep:连接两个字符之间插入的符号
collapse:连接后,使用哪个字符分割
示例
> str_c("Letter: ", letters)
[1] "Letter: a" "Letter: b" "Letter: c" "Letter: d" "Letter: e" "Letter: f" "Letter: g" "Letter: h"
[9] "Letter: i" "Letter: j" "Letter: k" "Letter: l" "Letter: m" "Letter: n" "Letter: o" "Letter: p"
[17] "Letter: q" "Letter: r" "Letter: s" "Letter: t" "Letter: u" "Letter: v" "Letter: w" "Letter: x"
[25] "Letter: y" "Letter: z"
> str_c("Letter", letters, sep = ": ")
[1] "Letter: a" "Letter: b" "Letter: c" "Letter: d" "Letter: e" "Letter: f" "Letter: g" "Letter: h"
[9] "Letter: i" "Letter: j" "Letter: k" "Letter: l" "Letter: m" "Letter: n" "Letter: o" "Letter: p"
[17] "Letter: q" "Letter: r" "Letter: s" "Letter: t" "Letter: u" "Letter: v" "Letter: w" "Letter: x"
[25] "Letter: y" "Letter: z"
> str_c(letters, " is for", "...")
[1] "a is for..." "b is for..." "c is for..." "d is for..." "e is for..." "f is for..." "g is for..."
[8] "h is for..." "i is for..." "j is for..." "k is for..." "l is for..." "m is for..." "n is for..."
[15] "o is for..." "p is for..." "q is for..." "r is for..." "s is for..." "t is for..." "u is for..."
[22] "v is for..." "w is for..." "x is for..." "y is for..." "z is for..."
> str_c(letters[-26], " comes before ", letters[-1])
[1] "a comes before b" "b comes before c" "c comes before d" "d comes before e" "e comes before f"
[6] "f comes before g" "g comes before h" "h comes before i" "i comes before j" "j comes before k"
[11] "k comes before l" "l comes before m" "m comes before n" "n comes before o" "o comes before p"
[16] "p comes before q" "q comes before r" "r comes before s" "s comes before t" "t comes before u"
[21] "u comes before v" "v comes before w" "w comes before x" "x comes before y" "y comes before z"
> str_c(letters, collapse = "")
[1] "abcdefghijklmnopqrstuvwxyz"
> str_c(letters, collapse = ", ")
[1] "a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z"
> str_c(c("a", NA, "b"), "-d")
[1] "a-d" "NA-d" "b-d"
2 、字符串计数:str_count()
str_count(string, pattern = "")
string:字符串
pattern:对哪个字符进行计数
示例
> fruit <- c("apple", "banana", "pear", "pineapple")
> str_count(fruit, "a")
[1] 1 3 1 1
> str_count(fruit, "p")
[1] 2 0 1 3
> str_count(fruit, "e")
[1] 1 0 1 2
> str_count(fruit, c("a", "b", "p", "p"))
[1] 1 1 1 3
> str_count(c("a.", "...", ".a.a"), ".")
[1] 2 3 4
> str_count(c("a.", "...", ".a.a"), fixed("."))
[1] 1 3 2
3 、字符串逻辑判断:str_detect()
str_detect(string, pattern = "")
string:字符串
pattern:对哪个字符进行逻辑判断
示例
> fruit <- c("apple", "banana", "pear", "pinapple")
> str_detect(fruit, "a")
[1] TRUE TRUE TRUE TRUE
> str_detect(fruit, "^a")
[1] TRUE FALSE FALSE FALSE
> str_detect(fruit, "a$")
[1] FALSE TRUE FALSE FALSE
> str_detect(fruit, "b")
[1] FALSE TRUE FALSE FALSE
> str_detect(fruit, "[aeiou]")
[1] TRUE TRUE TRUE TRUE
> # Also vectorised over pattern
> str_detect("aecfg", letters)
[1] TRUE FALSE TRUE FALSE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[17] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
4 、复制字符串:str_dup()
str_dup(string, times)
string:字符串
times:复制的次数
示例
> fruit <- c("apple", "pear", "banana")
> str_dup(fruit, 2)
[1] "appleapple" "pearpear" "bananabanana"
> str_dup(fruit, 1:3)
[1] "apple" "pearpear" "bananabananabanana"
> str_c("ba", str_dup("na", 0:5))
[1] "ba" "bana" "banana" "bananana" "banananana" "bananananana"
5 、从字符串中提取匹配字符:str_extract()
str_extract(string, pattern)
str_extract_all(string, pattern, simplify = FALSE)
string:字符串
pattern:匹配的字符,默认正则和非正则
示例
> shopping_list <- c("apples x4", "bag of flour", "bag of sugar", "milk x2")
> str_extract(shopping_list, "\\d")
[1] "4" NA NA "2"
> str_extract(shopping_list, "[a-z]+")
[1] "apples" "bag" "bag" "milk"
> str_extract(shopping_list, "[a-z]{1,4}")
[1] "appl" "bag" "bag" "milk"
> str_extract(shopping_list, "\\b[a-z]{1,4}\\b")
[1] NA "bag" "bag" "milk"
> str_extract_all(shopping_list, "[a-z]+")
[[1]]
[1] "apples" "x"
[[2]]
[1] "bag" "of" "flour"
[[3]]
[1] "bag" "of" "sugar"
[[4]]
[1] "milk" "x"
> str_extract_all(shopping_list, "\\b[a-z]+\\b")
[[1]]
[1] "apples"
[[2]]
[1] "bag" "of" "flour"
[[3]]
[1] "bag" "of" "sugar"
[[4]]
[1] "milk"
> str_extract_all(shopping_list, "\\d")
[[1]]
[1] "4"
[[2]]
character(0)
[[3]]
character(0)
[[4]]
[1] "2"
6 、字符串的长度:str_length()
str_length(string))
string:字符串
示例
> str_length(letters)
[1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
> str_length(NA)
[1] NA
> str_length(factor("abc"))
[1] 3
> str_length(c("i", "like", "programming", NA))
[1] 1 4 11 NA
> # Two ways of representing a u with an umlaut
> u1 <- "\u00fc"
> u2 <- stringi::stri_trans_nfd(u1)
> # The print the same:
> u1
[1] "ü"
> u2
[1] "ü"
> # But have a different length
> str_length(u1)
[1] 1
> str_length(u2)
[1] 2
7 、找到匹配的字符串的位置:str_locate()
str_locate(string, pattern)
str_locate_all(string, pattern)
string:字符串
pattern:匹配的字符,默认正则和非正则
示例
> fruit <- c("apple", "banana", "pear", "pineapple")
> str_locate(fruit, "$")
start end
[1,] 6 5
[2,] 7 6
[3,] 5 4
[4,] 10 9
> str_locate(fruit, "a")
start end
[1,] 1 1
[2,] 2 2
[3,] 3 3
[4,] 5 5
> str_locate(fruit, "e")
start end
[1,] 5 5
[2,] NA NA
[3,] 2 2
[4,] 4 4
> str_locate(fruit, c("a", "b", "p", "p"))
start end
[1,] 1 1
[2,] 1 1
[3,] 1 1
[4,] 1 1
> str_locate_all(fruit, "a")
[[1]]
start end
[1,] 1 1
[[2]]
start end
[1,] 2 2
[2,] 4 4
[3,] 6 6
[[3]]
start end
[1,] 3 3
[[4]]
start end
[1,] 5 5
> str_locate_all(fruit, "e")
[[1]]
start end
[1,] 5 5
[[2]]
start end
[[3]]
start end
[1,] 2 2
[[4]]
start end
[1,] 4 4
[2,] 9 9
> str_locate_all(fruit, "")
[[1]]
start end
[1,] 1 0
[2,] 2 1
[3,] 3 2
[4,] 4 3
[5,] 5 4
[[2]]
start end
[1,] 1 0
[2,] 2 1
[3,] 3 2
[4,] 4 3
[5,] 5 4
[6,] 6 5
[[3]]
start end
[1,] 1 0
[2,] 2 1
[3,] 3 2
[4,] 4 3
[[4]]
start end
[1,] 1 0
[2,] 2 1
[3,] 3 2
[4,] 4 3
[5,] 5 4
[6,] 6 5
[7,] 7 6
[8,] 8 7
[9,] 9 8
8 、从字符串中提取匹配组:str_match()
str_match(string, pattern)
str_match_all(string, pattern)
string:字符串
pattern:匹配的字符,默认正则和非正则
示例
> strings <- c(" 219 733 8965", "329-293-8753 ", "banana", "595 794 7569",
+ "387 287 6718", "apple", "233.398.9187 ", "482 952 3315",
+ "239 923 8115 and 842 566 4692", "Work: 579-499-7527", "$1000",
+ "Home: 543.355.3679")
> phone <- "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})"
> str_extract(strings, phone)
[1] "219 733 8965" "329-293-8753" NA "595 794 7569" "387 287 6718" NA
[7] "233.398.9187" "482 952 3315" "239 923 8115" "579-499-7527" NA "543.355.3679"
> str_match(strings, phone)
[,1] [,2] [,3] [,4]
[1,] "219 733 8965" "219" "733" "8965"
[2,] "329-293-8753" "329" "293" "8753"
[3,] NA NA NA NA
[4,] "595 794 7569" "595" "794" "7569"
[5,] "387 287 6718" "387" "287" "6718"
[6,] NA NA NA NA
[7,] "233.398.9187" "233" "398" "9187"
[8,] "482 952 3315" "482" "952" "3315"
[9,] "239 923 8115" "239" "923" "8115"
[10,] "579-499-7527" "579" "499" "7527"
[11,] NA NA NA NA
[12,] "543.355.3679" "543" "355" "3679"
> str_extract_all(strings, phone)
[[1]]
[1] "219 733 8965"
[[2]]
[1] "329-293-8753"
[[3]]
character(0)
[[4]]
[1] "595 794 7569"
[[5]]
[1] "387 287 6718"
[[6]]
character(0)
[[7]]
[1] "233.398.9187"
[[8]]
[1] "482 952 3315"
[[9]]
[1] "239 923 8115" "842 566 4692"
[[10]]
[1] "579-499-7527"
[[11]]
character(0)
[[12]]
[1] "543.355.3679"
> str_match_all(strings, phone)
[[1]]
[,1] [,2] [,3] [,4]
[1,] "219 733 8965" "219" "733" "8965"
[[2]]
[,1] [,2] [,3] [,4]
[1,] "329-293-8753" "329" "293" "8753"
[[3]]
character(0)
[[4]]
[,1] [,2] [,3] [,4]
[1,] "595 794 7569" "595" "794" "7569"
[[5]]
[,1] [,2] [,3] [,4]
[1,] "387 287 6718" "387" "287" "6718"
[[6]]
character(0)
[[7]]
[,1] [,2] [,3] [,4]
[1,] "233.398.9187" "233" "398" "9187"
[[8]]
[,1] [,2] [,3] [,4]
[1,] "482 952 3315" "482" "952" "3315"
[[9]]
[,1] [,2] [,3] [,4]
[1,] "239 923 8115" "239" "923" "8115"
[2,] "842 566 4692" "842" "566" "4692"
[[10]]
[,1] [,2] [,3] [,4]
[1,] "579-499-7527" "579" "499" "7527"
[[11]]
character(0)
[[12]]
[,1] [,2] [,3] [,4]
[1,] "543.355.3679" "543" "355" "3679"
> x <- c("<a> <b>", "<a> <>", "<a>", "", NA)
> str_match(x, "<(.*?)> <(.*?)>")
[,1] [,2] [,3]
[1,] "<a> <b>" "a" "b"
[2,] "<a> <>" "a" ""
[3,] NA NA NA
[4,] NA NA NA
[5,] NA NA NA
> str_match_all(x, "<(.*?)>")
[[1]]
[,1] [,2]
[1,] "<a>" "a"
[2,] "<b>" "b"
[[2]]
[,1] [,2]
[1,] "<a>" "a"
[2,] "<>" ""
[[3]]
[,1] [,2]
[1,] "<a>" "a"
[[4]]
character(0)
[[5]]
character(0)
> str_extract(x, "<.*?>")
[1] "<a>" "<a>" "<a>" NA NA
> str_extract_all(x, "<.*?>")
[[1]]
[1] "<a>" "<b>"
[[2]]
[1] "<a>" "<>"
[[3]]
[1] "<a>"
[[4]]
character(0)
[[5]]
character(0)
9 、字符串增加空字符:str_pad()
str_pad(string, width, side = c("left", "right", "both"), pad = " ")
string:字符向量
decreasing:宽度
side:左边,右边,还是两边增加空格
示例
> str_pad("conan", 20, "left")
[1] " conan"
> # 从右边补充空格,直到字符串长度为20
> str_pad("conan", 20, "right")
[1] "conan "
> # 从左右两边各补充空格,直到字符串长度为20
> str_pad("conan", 20, "both")
[1] " conan "
> # 从左右两边各补充x字符,直到字符串长度为20
> str_pad("conan", 20, "both",'x')
[1] "xxxxxxxconanxxxxxxxx"
10 、替换字符串:str_replace()
str_replace(string, pattern, replacement)
str_replace_all(string, pattern, replacement)
string:字符向量
pattern:匹配的字符,默认正则和非正则
示例
> fruits <- c("one apple", "two pears", "three bananas")
> str_replace(fruits, "[aeiou]", "-")
[1] "-ne apple" "tw- pears" "thr-e bananas"
> str_replace_all(fruits, "[aeiou]", "-")
[1] "-n- -ppl-" "tw- p--rs" "thr-- b-n-n-s"
> str_replace(fruits, "([aeiou])", "")
[1] "ne apple" "tw pears" "thre bananas"
> str_replace(fruits, "([aeiou])", "\\1\\1")
[1] "oone apple" "twoo pears" "threee bananas"
> str_replace(fruits, "[aeiou]", c("1", "2", "3"))
[1] "1ne apple" "tw2 pears" "thr3e bananas"
> str_replace(fruits, c("a", "e", "i"), "-")
[1] "one -pple" "two p-ars" "three bananas"
> fruits <- c("one apple", "two pears", "three bananas")
> str_replace(fruits, "[aeiou]", "-")
[1] "-ne apple" "tw- pears" "thr-e bananas"
> str_replace_all(fruits, "[aeiou]", "-")
[1] "-n- -ppl-" "tw- p--rs" "thr-- b-n-n-s"
> str_replace_all(fruits, "([aeiou])", "")
[1] "n ppl" "tw prs" "thr bnns"
> str_replace_all(fruits, "([aeiou])", "\\1\\1")
[1] "oonee aapplee" "twoo peeaars" "threeee baanaanaas"
> str_replace_all(fruits, "[aeiou]", c("1", "2", "3"))
[1] "1n1 1ppl1" "tw2 p22rs" "thr33 b3n3n3s"
> str_replace_all(fruits, c("a", "e", "i"), "-")
[1] "one -pple" "two p-ars" "three bananas"
11 、分割字符串:str_split()
str_split(string, pattern, n = Inf, simplify = FALSE)
str_split_fixed(string, pattern, n)
string:字符向量
pattern:匹配的字符,默认正则和非正则
simplify: 如何值为FALSE返回字符串向量,如何值为FALSE返回字符串矩阵
示例
> fruits <- c(
+ "apples and oranges and pears and bananas",
+ "pineapples and mangos and guavas"
+ )
> str_split(fruits, " and ")
[[1]]
[1] "apples" "oranges" "pears" "bananas"
[[2]]
[1] "pineapples" "mangos" "guavas"
> str_split(fruits, " and ", n = 3)
[[1]]
[1] "apples" "oranges" "pears and bananas"
[[2]]
[1] "pineapples" "mangos" "guavas"
> str_split(fruits, " and ", n = 2)
[[1]]
[1] "apples" "oranges and pears and bananas"
[[2]]
[1] "pineapples" "mangos and guavas"
> # If n greater than number of pieces, no padding occurs
> str_split(fruits, " and ", n = 5)
[[1]]
[1] "apples" "oranges" "pears" "bananas"
[[2]]
[1] "pineapples" "mangos" "guavas"
> # Use fixed to return a character matrix
> str_split_fixed(fruits, " and ", 3)
[,1] [,2] [,3]
[1,] "apples" "oranges" "pears and bananas"
[2,] "pineapples" "mangos" "guavas"
> str_split_fixed(fruits, " and ", 4)
[,1] [,2] [,3] [,4]
[1,] "apples" "oranges" "pears" "bananas"
[2,] "pineapples" "mangos" "guavas" ""
12 、截取字符串:str_sub()
str_sub(string, start = 1L, end = -1L)
str_sub(string, start = 1L, end = -1L) <- value
string:字符向量
start:开始位置
end: 结束位置
示例
> hw <- "Hadley Wickham"
> str_sub(hw, 1, 6)
[1] "Hadley"
> str_sub(hw, end = 6)
[1] "Hadley"
> str_sub(hw, 8, 14)
[1] "Wickham"
> str_sub(hw, 8)
[1] "Wickham"
> str_sub(hw, c(1, 8), c(6, 14))
[1] "Hadley" "Wickham"
> str_sub(hw, -1)
[1] "m"
> str_sub(hw, -7)
[1] "Wickham"
> str_sub(hw, end = -7)
[1] "Hadley W"
> # Alternatively, you can pass in a two colum matrix, as in the
> # output from str_locate_all
> pos <- str_locate_all(hw, "[aeio]")[[1]]
> str_sub(hw, pos)
[1] "adley Wickham" "ey Wickham" "ickham" "am" "adley Wickham" "ey Wickham"
[7] "ickham" "am"
> str_sub(hw, pos[, 1], pos[, 2])
[1] "a" "e" "i" "a"
> # Vectorisation
> str_sub(hw, seq_len(str_length(hw)))
[1] "Hadley Wickham" "adley Wickham" "dley Wickham" "ley Wickham" "ey Wickham"
[6] "y Wickham" " Wickham" "Wickham" "ickham" "ckham"
[11] "kham" "ham" "am" "m"
> str_sub(hw, end = seq_len(str_length(hw)))
[1] "H" "Ha" "Had" "Hadl" "Hadle"
[6] "Hadley" "Hadley " "Hadley W" "Hadley Wi" "Hadley Wic"
[11] "Hadley Wick" "Hadley Wickh" "Hadley Wickha" "Hadley Wickham"
> # Replacement form
> x <- "BBCDEF"
> str_sub(x, 1, 1) <- "A"; x
[1] "ABCDEF"
> str_sub(x, -1, -1) <- "K"; x
[1] "ABCDEK"
> str_sub(x, -2, -2) <- "GHIJ"; x
[1] "ABCDGHIJK"
> str_sub(x, 2, -2) <- ""; x
[1] "AK"
13 、删除空字符串:str_trim()
str_trim(string, side = c("both", "left", "right"))
string:字符向量
side:删除左边,右边,两边的空字符
示例
> str_trim(" String with trailing and leading white space\t")
[1] "String with trailing and leading white space"
> str_trim("\n\nString with trailing and leading white space\n\n")
[1] "String with trailing and leading white space"
网友评论