美文网首页R语言R
stringr包字符串处理

stringr包字符串处理

作者: 小梦游仙境 | 来源:发表于2019-08-13 22:04 被阅读0次

    这是stringr包中常用的字符串处理函数,字符串处理好了,简直是利器在手。

    str_length获得字符串长度
    > b<-c('abc','456','aini')
    > str_length(b)
    [1] 3 3 4
    > length(b)
    [1] 3
    
    str_sub截断字符串

    使用sub_str()访问单个字符。它有三个参数:字符向量、起始位置和结束位置。任何一个位置都可以是一个正整数,从长度计算,或者是一个负整数,从右边计算。

    x <- c("abcdef", "ghifjk")
    str_sub(x, 3, 3)
    #> [1] "c" "i"
    str_sub(x, 2, -2)
    #> [1] "bcde" "hifj"
    
    str_sub可以修改字符
    str_sub(x, 3, 3) <- "X"
    x
    #> [1] "abXdef" "ghXfjk"
    
    str_pad通过增加空格来将字符串长度在某一长度
    x <- c("abc", "defghi")
    str_pad(x, 10)
    #> [1] "       abc" "    defghi"
    str_pad(x, 10, "both")
    #> [1] "   abc    " "  defghi  "
    #str_pad不会将字符长度缩短
    str_pad(x, 4)
    #> [1] " abc"   "defghi"
    x <- c("Short", "This is a long string")
    x %>% 
      str_trunc(10) %>% 
      str_pad(10, "right")
    #> [1] "Short     " "This is..."
    
    str_trim与str_pad相反,删除空格
    x <- c("  a   ", "b   ",  "   c")
    str_trim(x)
    #> [1] "a" "b" "c"
    str_trim(x, "left")
    #> [1] "a   " "b   " "c"
    
    str_wrap包装一段文字,使每一行的长度尽可能相似。
    jabberwocky <- str_c(
      "`Twas brillig, and the slithy toves ",
      "did gyre and gimble in the wabe: ",
      "All mimsy were the borogoves, ",
      "and the mome raths outgrabe. "
    )
    cat(str_wrap(jabberwocky, width = 40))
    #> `Twas brillig, and the slithy toves did
    #> gyre and gimble in the wabe: All mimsy
    #> were the borogoves, and the mome raths
    #> outgrabe.
    
    str_to_upper、str_to_lower大小写和str_to_title首字母
    x <- "I like horses."
    str_to_upper(x)
    #> [1] "I LIKE HORSES."
    str_to_title(x)
    #> [1] "I Like Horses."
    
    str_to_lower(x)
    #> [1] "i like horses."
    # Turkish has two sorts of i: with and without the dot
    str_to_lower(x, "tr")
    #> [1] "ı like horses."
    
    str_order()和str_sort()对字符向量排序

    str_orderstr_sort的区别在于前者返回排序后的索引(下标),后者返回排序后的实际值

    x <- c("y", "i", "k")
    str_order(x)
    #> [1] 2 3 1
    str_sort(x)
    #> [1] "i" "k" "y"
    # In Lithuanian, y comes between i and k
    str_sort(x, locale = "lt")
    #> [1] "i" "y" "k"
    
    str_detect()str_subset()检测字符串中是否存在某种匹配模

    str_detect()检测模式的存在与否,并返回逻辑向量(类似于grepl())。str_子集()返回与正则表达式(类似于grep()value = TRUE)匹配的字符向量的元素)。

    fruit <- c("apple", "banana", "pear", "pinapple")
    str_detect(fruit, "a")
    [1] TRUE TRUE TRUE TRUE
    str_detect(fruit, "^a")
    [1]  TRUE FALSE FALSE FALSE
    str_detect("aecfg", letters)
    [1]  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE
    [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
    [23] FALSE FALSE FALSE FALSE
    > str_subset(fruit, "a")
    [1] "apple"    "banana"   "pear"     "pinapple"
    > str_subset(fruit, "a$")
    [1] "banana"
    
    str_count计算匹配数
    fruit <- c("apple", "banana", "pear", "pineapple")
    str_count(fruit, "a")
    ## [1] 1 3 1 1
    str_count(fruit, "p")
    ## [1] 2 0 1 3
    str_count(fruit, c("a", "b", "p", "p"))
    ## [1] 1 1 1 3
    
    str_locate和str_locate_all()定位

    str_locate()定位模式的第一个位置,并返回一个包含列startend的数字矩阵。str_locate_all()查找所有匹配项,返回一个数字矩阵列表。类似于regexpr()gregexpr()

    > x <- c("abcdef", "ghifjk")
    > str_locate(x, "def")
         start end
    [1,]     4   6
    [2,]    NA  NA
    > str_locate(x, "fjk")
         start end
    [1,]    NA  NA
    [2,]     4   6
    str_locate_all(c("abcdefabc", "ghifjkabc"), "abc")
    [[1]]
    start end
    [1,]     1   3
    [2,]     7   9
    
    [[2]]
    start end
    [1,]     7   9
    
    str_extract和str_extract_all提取匹配字符串

    str_extract()提取与第一个匹配项对应的文本,返回一个字符向量。str_extract_all()提取所有匹配项并返回字符向量列表。

    shopping_list <- c("apples x4", "bag of flour", "bag of sugar", "milk x2")
    str_extract(shopping_list, "\\d")
     ## [1] "4" NA  NA  "2"
     str_extract(shopping_list, "[a-z]+")
    ## [1] "apples" "bag"    "bag"    "milk"
    str_extract(shopping_list, "[a-z]{1,4}")
    ## [1] "appl" "bag"  "bag"  "milk"
    str_extract(shopping_list, "\\b[a-z]{1,4}\\b")
    ## [1] NA     "bag"  "bag"  "milk"
    > str_extract_all(shopping_list, "[a-z]+")
    [[1]]
    [1] "apples" "x"     
    
    [[2]]
    [1] "bag"   "of"    "flour"
    
    [[3]]
    [1] "bag"   "of"    "sugar"
    
    [[4]]r
    [1] "milk" "x"
    
    str_replace和str_replace_all字符串替换

    str_replace()替换第一个匹配的模式并返回一个字符向量。str_replace_all()替换所有匹配项。类似于sub()gsub()

    > string<-str_replace('1989.03.24','\\.','-')
    > string
    [1] "1989-03.24"
    > string<-str_replace_all('1989.03.24','\\.','-')
    > string
    [1] "1989-03-24"
    
    str_split和str_split_fixed字符串分割

    str_split_fixed()根据模式将字符串分割成固定数量的片段,并返回一个字符矩阵。str_split()将字符串分割成可变数量的片段,并返回一个字符向量列表。

    str_split("a-b-c", "-")
    #> [[1]]
    #> [1] "a" "b" "c"
    str_split_fixed("a-b-c", "-", n = 2)
    #>      [,1] [,2] 
    #> [1,] "a"  "b-c"
    

    相关文章

      网友评论

        本文标题:stringr包字符串处理

        本文链接:https://www.haomeiwen.com/subject/zfuijctx.html