library(tidyverse)
rm(list = ls())
options(stringsAsFactors = T)
#构建测试数据集
df <- forcats::gss_cat%>%
select(c("rincome", "denom"))
head(df,3)
data:image/s3,"s3://crabby-images/7e4b2/7e4b20fea960ed35f15d5cb24709b5da67b7c748" alt=""
数据集情况
# 对df的每一列都使用`levels()`函数
lapply(df, levels)
data:image/s3,"s3://crabby-images/77168/7716802bc603c84158247628e87451b6a062061f" alt=""
查看每列的分类情况
#查看每列分类分布个数
lapply(df,fct_count)
data:image/s3,"s3://crabby-images/ae643/ae6437d4df14a7423356cbcff437fd5f25214494" alt=""
这个功能比较好,能看到个数分布情况
#查看每列分类分布比例
lapply(df,fct_count,prop = T)
data:image/s3,"s3://crabby-images/0d0ce/0d0ceb3a7a4e03d519bd07b2b81b8567ff6cbf73" alt=""
这个功能更好了,可以看到分布比例小于0.1的变量
# 对df的每一列使用`fct_relevel(..., "Don't know", after = Inf)`
df2 <- lapply(df, fct_relevel, "Don't know", after = Inf)
lapply(df2, levels) # 可以看到"Don't know"都被排在最后了
data:image/s3,"s3://crabby-images/bb01c/bb01c444ed4609b3f10ab9554a76b8340b292f16" alt=""
批量relevel感觉可能不太实用
# 3. 按照因子因素进行排序----------------------------------------------------------------------
# fct_inorder(): 按照第一次出现的顺序
#
# fct_infreq(): 按照每个水平出现的频率(从大到小)
#
# fct_inseq(): 按照数字大小
f <- factor(c("b", "b", "a", "c", "c", "c"))
f #默认按字母顺序
data:image/s3,"s3://crabby-images/93025/93025fddbd84972f833d9db2fbdb568873d1fb59" alt=""
默认按照字母顺序要牢记
fct_inorder(f) # 按第一次出现的顺序
data:image/s3,"s3://crabby-images/b9d00/b9d00cce9a84a5ae50fb46bcc419beb37f179baf" alt=""
第一次出现的顺序一般没啥意义,所以觉得不太实用
fct_infreq(f) # 按出现的频率从大到小排列
data:image/s3,"s3://crabby-images/598e2/598e22c2b45e1eabbaf32ca0a3ef06efc8877582" alt=""
一般经常以频率最高的为参考组,所以这个功能比较有用
f <- factor(1:3, levels = c("3", "2", "1"))
fct_inseq(f) # 按照数字顺序排列,虽然你定义的顺序是"3", "2", "1"
data:image/s3,"s3://crabby-images/d78c4/d78c449c150cf109b4897db4c37967d537c6c5fe" alt=""
按照从小到大的顺序排序
#绘图展示
starwars%>%
filter(!is.na(hair_color))%>%
ggplot(aes(x = hair_color,fill = hair_color))+
geom_bar() +
coord_flip()
data:image/s3,"s3://crabby-images/b873f/b873fcacd378b2eed16b74cdcdb25e7e087bee45" alt=""
原始图,删除缺失是因为缺失会始终排在第一个
#按照频率排序
starwars%>%
filter(!is.na(hair_color))%>%
ggplot(aes(x = fct_infreq(hair_color),fill = hair_color))+
geom_bar() +
coord_flip()
data:image/s3,"s3://crabby-images/f05cc/f05cc7bab7914eb8966974c050bf489a893ac772" alt=""
按照频率排序,看这里顺多了
# 4. reorder函数操作,按照其他列顺序进行操作----------------------------------------------------------------------
#生成一个简单的tibble
df <- tibble::tribble(
~color, ~a, ~b,
"blue", 1, 2,
"green", 6, 2,
"purple", 3, 3,
"red", 2, 3,
"yellow", 5, 1
)
data:image/s3,"s3://crabby-images/dec5b/dec5b10bcec89e2125057e77f2da46d385849df0" alt=""
模拟数据集
df$color <- factor(df$color)
df$color
data:image/s3,"s3://crabby-images/31f0d/31f0da4573b98a5246db0db179c8632f8459dc9d" alt=""
默认顺序
# 按照a这一列从小到大的顺序,排序color这一列,
# 可以看到color的levels已经变了
fct_reorder(df$color, df$a, min)
data:image/s3,"s3://crabby-images/e285d/e285d3bc4515896f9854f52775daaafaf3249e68" alt=""
按照a列的大小对颜色列进行排序
#fct_reorder()用于画图小例子
boxplot(Sepal.Width ~ Species, data = iris)
boxplot(Sepal.Width ~ fct_reorder(Species, Sepal.Width), data = iris)
boxplot(Sepal.Width ~ fct_reorder(Species, Sepal.Width, .desc = TRUE), data = iris)
data:image/s3,"s3://crabby-images/4fc72/4fc72af084b7d0a92281c53724a7da3ec97869c3" alt=""
原始图像
data:image/s3,"s3://crabby-images/34e5c/34e5ce98e4fee3339440acd694fff71efd5243b4" alt=""
按照Y轴排序
data:image/s3,"s3://crabby-images/34207/34207d04975ee1eaf711b50b416e9aceed8a63e4" alt=""
降序排序
#fct_reorder2的例子
chks <- subset(ChickWeight, as.integer(Chick) < 10)
chks <- transform(chks, Chick = fct_shuffle(Chick))
chks
data:image/s3,"s3://crabby-images/59d58/59d58093365e2e8f314fd1962c59b9ca2fd91dd1" alt=""
数据集
ggplot(chks, aes(Time, weight, colour = Chick)) +
geom_point() +
geom_line()
data:image/s3,"s3://crabby-images/c7ed2/c7ed29084314931239fbe01477a5843ac46c8b7f" alt=""
原始图片
# 图例的顺序和线的顺序一样
ggplot(chks, aes(Time, weight, colour = fct_reorder2(Chick, Time, weight))) +
geom_point() +
geom_line() +
labs(colour = "Chick")
data:image/s3,"s3://crabby-images/9c555/9c555551417d42b02b24e53dd8c8b5c274a0536c" alt=""
顺序一样了,其实我也不太理解
#将因子水平左右移动,默认向左移
x <- factor(
c("Mon", "Tue", "Wed"),
levels = c("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"),
ordered = TRUE
)
x
data:image/s3,"s3://crabby-images/c277f/c277ff81a271724d7aecabcfae539fc5055be8dd" alt=""
原始因子水平
fct_shift(x)
data:image/s3,"s3://crabby-images/d4943/d49439b482bb3f623733ab017624eaa99e512953" alt=""
默认移动到最左侧
fct_shift(x, 2)
data:image/s3,"s3://crabby-images/67a2b/67a2be33b3616de9d82b90c70091bc18e6287e5d" alt=""
移动超过边界后会从最后再开始
fct_shift(x, -1)#向右移动
data:image/s3,"s3://crabby-images/78294/782943e6fac00444b2e65282077e99c8f3eb8c14" alt=""
这个有用
网友评论