### 在原有的数据基础上添加新的列
msleep %>%
select(name, sleep_total) %>%
mutate(sleep_total_min = sleep_total * 60) %>% head
## # A tibble: 6 x 3
## name sleep_total sleep_total_min
## <chr> <dbl> <dbl>
## 1 Cheetah 12.1 726
## 2 Owl monkey 17 1020
## 3 Mountain beaver 14.4 864
## 4 Greater short-tailed shrew 14.9 894
## 5 Cow 4 240
## 6 Three-toed sloth 14.4 864
### 形成新的列的同时形成新的数据框
msleep %>%
select(name, sleep_total) %>%
transmute(sleep_total_min = sleep_total * 60) %>% head
## # A tibble: 6 x 1
## sleep_total_min
## <dbl>
## 1 726
## 2 1020
## 3 864
## 4 894
## 5 240
## 6 864
muate()在使用的时候,一些运算函数,例如mean()或者sum()使用的时候会出现错误,我们需要自己来写运算公式,例如mutate(average = (sleep_rem + sleep_cycle) / 2)
### 不实用rowwise函数
Notwise <- msleep %>%
select(name, contains("sleep")) %>%
transmute(avg = mean(c(sleep_rem, sleep_cycle))) %>%
### 使用rowwise函数
Usewise <- msleep %>%
select(name, contains("sleep")) %>%
rowwise() %>%
transmute(avg = mean(c(sleep_rem, sleep_cycle))) %>%
data.frame(Notwise = Notwise, Usewise = Usewise) %>% head
## Notwise Usewise
## 1 NA NA
## 2 NA NA
## 3 NA NA
## 4 NA 1.2166667
## 5 NA 0.6833333
## 6 NA 1.4833333
- mutate_all() 将会对所有的列进行相同的处理
- mutate_if() 先进行逻辑判断,符合条件的进行相同的处理
- mutate_at() 对在vars()函数当中的列进行相同的处理
来进行制定的。例如我需要对所有的数据都添加一个回车。这个时候mutate_all()当中写~paste(., " /n ")
或者funs(paste(., " /n "))
msleep %>% select(1:3) %>%
mutate_all(tolower) %>% head
## # A tibble: 6 x 3
## name genus vore
## <chr> <chr> <chr>
## 1 cheetah acinonyx carni
## 2 owl monkey aotus omni
## 3 mountain beaver aplodontia herbi
## 4 greater short-tailed shrew blarina omni
## 5 cow bos herbi
## 6 three-toed sloth bradypus herbi
msleep %>%
select(name, sleep_total:bodywt) %>%
mutate_if(is.numeric, ~format(round(.,3),nsmall =3))
## # A tibble: 83 x 7
## name sleep_total sleep_rem sleep_cycle awake brainwt bodywt
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Cheetah "12.100" " NA" " NA" "11.90… " NA" " 50.0…
## 2 Owl monkey "17.000" "1.800" " NA" " 7.00… "0.016" " 0.4…
## 3 Mountain beaver "14.400" "2.400" " NA" " 9.60… " NA" " 1.3…
## 4 Greater short-tai… "14.900" "2.300" "0.133" " 9.10… "0.000" " 0.0…
## 5 Cow " 4.000" "0.700" "0.667" "20.00… "0.423" " 600.0…
## 6 Three-toed sloth "14.400" "2.200" "0.767" " 9.60… " NA" " 3.8…
## 7 Northern fur seal " 8.700" "1.400" "0.383" "15.30… " NA" " 20.4…
## 8 Vesper mouse " 7.000" " NA" " NA" "17.00… " NA" " 0.0…
## 9 Dog "10.100" "2.900" "0.333" "13.90… "0.070" " 14.0…
## 10 Roe deer " 3.000" " NA" " NA" "21.00… "0.098" " 14.8…
## # … with 73 more rows
msleep %>%
select(name, sleep_total:awake) %>%
mutate_at(vars(contains("sleep")), ~(.*60))
## # A tibble: 83 x 5
## name sleep_total sleep_rem sleep_cycle awake
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Cheetah 726 NA NA 11.9
## 2 Owl monkey 1020 108 NA 7
## 3 Mountain beaver 864 144 NA 9.6
## 4 Greater short-tailed shrew 894 138 8.00 9.1
## 5 Cow 240 42 40.0 20
## 6 Three-toed sloth 864 132 46.0 9.6
## 7 Northern fur seal 522 84 23.0 15.3
## 8 Vesper mouse 420 NA NA 17
## 9 Dog 606 174 20.0 13.9
## 10 Roe deer 180 NA NA 21
## # … with 73 more rows
msleep %>%
select(name, sleep_total:awake) %>%
mutate_at(vars(contains("sleep")), ~(.*60)) %>%
rename_at(vars(contains("sleep")), ~paste0(.,"_min"))
## # A tibble: 83 x 5
## name sleep_total_min sleep_rem_min sleep_cycle_min awake
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Cheetah 726 NA NA 11.9
## 2 Owl monkey 1020 108 NA 7
## 3 Mountain beaver 864 144 NA 9.6
## 4 Greater short-tailed shr… 894 138 8.00 9.1
## 5 Cow 240 42 40.0 20
## 6 Three-toed sloth 864 132 46.0 9.6
## 7 Northern fur seal 522 84 23.0 15.3
## 8 Vesper mouse 420 NA NA 17
## 9 Dog 606 174 20.0 13.9
## 10 Roe deer 180 NA NA 21
## # … with 73 more rows
msleep %>%
select(name, sleep_total, sleep_rem) %>%
mutate_at(vars(contains("sleep")), funs(min = .*60))
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas:
## # Simple named list:
## list(mean = mean, median = median)
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.
## # A tibble: 83 x 5
## name sleep_total sleep_rem sleep_total_min sleep_rem_min
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Cheetah 12.1 NA 726 NA
## 2 Owl monkey 17 1.8 1020 108
## 3 Mountain beaver 14.4 2.4 864 144
## 4 Greater short-tailed shr… 14.9 2.3 894 138
## 5 Cow 4 0.7 240 42
## 6 Three-toed sloth 14.4 2.2 864 132
## 7 Northern fur seal 8.7 1.4 522 84
## 8 Vesper mouse 7 NA 420 NA
## 9 Dog 10.1 2.9 606 174
## 10 Roe deer 3 NA 180 NA
## # … with 73 more rows
### 查看原始变量
## [1] "lc" NA "nt" "domesticated" "vu"
## [6] "en" "cd"
### recode的操作
msleep %>%
mutate(conservation2 = recode(conservation,
"en" = "Endangered",
"lc" = "Least_Concern",
"domesticated" = "Least_Concern",
.default = "other")) %>%
## # A tibble: 4 x 2
## conservation2 n
## <chr> <int>
## 1 Endangered 4
## 2 Least_Concern 37
## 3 other 13
## 4 <NA> 29
msleep %>%
mutate(conservation2 = recode_factor(conservation,
"lc" = "Least_Concern",
"domesticated" = "Least_Concern",
"en" = "Endangered",
.default = "other",
.missing = "no data",
.ordered = T)) %>%
## # A tibble: 4 x 2
## conservation2 n
## <ord> <int>
## 1 Least_Concern 37
## 2 Endangered 4
## 3 other 13
## 4 no data 29
msleep %>%
select(name, sleep_total) %>%
mutate(sleep_time = if_else(sleep_total > 10, "long", "short"))
## # A tibble: 83 x 3
## name sleep_total sleep_time
## <chr> <dbl> <chr>
## 1 Cheetah 12.1 long
## 2 Owl monkey 17 long
## 3 Mountain beaver 14.4 long
## 4 Greater short-tailed shrew 14.9 long
## 5 Cow 4 short
## 6 Three-toed sloth 14.4 long
## 7 Northern fur seal 8.7 short
## 8 Vesper mouse 7 short
## 9 Dog 10.1 long
## 10 Roe deer 3 short
## # … with 73 more rows
的左边是逻辑判断式,右边则是判断式的时候的结果。例如sleep_total > 13 ~ "very long"
代表sleep_total > 13
为真的时候,这个变量变为very long
case_when()的定义是按照顺序来的。只有第一个判断式是假的时候才会执行第二个。到了最后一个判断式的时候通过TRUE ~ XX
msleep %>%
select(name, sleep_total) %>%
mutate(sleep_total_discr = case_when(
sleep_total > 13 ~ "very long",
sleep_total > 10 ~ "long",
sleep_total > 7 ~ "limited",
TRUE ~ "short"))
## # A tibble: 83 x 3
## name sleep_total sleep_total_discr
## <chr> <dbl> <chr>
## 1 Cheetah 12.1 long
## 2 Owl monkey 17 very long
## 3 Mountain beaver 14.4 very long
## 4 Greater short-tailed shrew 14.9 very long
## 5 Cow 4 short
## 6 Three-toed sloth 14.4 very long
## 7 Northern fur seal 8.7 limited
## 8 Vesper mouse 7 short
## 9 Dog 10.1 long
## 10 Roe deer 3 short
## # … with 73 more rows
msleep %>%
mutate(silly_groups = case_when(
brainwt < 0.001 ~ "light_headed",
sleep_total > 10 ~ "lazy_sleeper",
is.na(sleep_rem) ~ "absent_rem",
TRUE ~ "other")) %>%
## # A tibble: 4 x 2
## silly_groups n
## <chr> <int>
## 1 absent_rem 8
## 2 lazy_sleeper 39
## 3 light_headed 6
## 4 other 30
msleep %>%
select(name:order) %>%
## # A tibble: 83 x 4
## name genus vore order
## <chr> <chr> <chr> <chr>
## 1 Cheetah Acinonyx carni Carnivora
## 2 Owl monkey Aotus <NA> Primates
## 3 Mountain beaver Aplodontia herbi Rodentia
## 4 Greater short-tailed shrew Blarina <NA> Soricomorpha
## 5 Cow Bos herbi Artiodactyla
## 6 Three-toed sloth Bradypus herbi Pilosa
## 7 Northern fur seal Callorhinus carni Carnivora
## 8 Vesper mouse Calomys <NA> Rodentia
## 9 Dog Canis carni Carnivora
## 10 Roe deer Capreolus herbi Artiodactyla
## # … with 73 more rows