链接: https://pan.baidu.com/s/1G5zZqkYB1TKszhvCT9F7zA 提取码: 6evn
install.packages("wordcloud")
install.packages("wordcloud2")
install.packages("rlang")
library(wordcloud)
library(wordcloud2)
#wordcloud(words=词向量,freq=词频向量, min.freq=n,max.words=m,random.order=TRUE/FALSE)
###分词###
install.packages("jiebaR")
install.packages("jiebaRD")
library(jiebaRD)
library(jiebaR)
devtools::install_github("leeshuheng/cnSentimentR")
#engine = worker()
#segment("人道至尊.txt",engine)
#word=scan('人道至尊.segment.2018-12-06_18_38_22.txt',sep='n',what='',encoding="UTF-8")
word=read.csv("D:/2.csv", header=F,encoding="UTF-8")
str(word)
word[,4]
#提取
wd=paste(word[,4],sep="",collapse="")
# 创建分词引擎
mixseg <- worker("mix")
# 分词和词频统计
kd <- segment(wd,jiebar=mixseg)
kd <- kd[nchar(kd)>1] # 筛选掉字符数只有1的无意义词
sd <- data.frame(table(kd)) # 统计各个词的频数
#sd <- sd[sd$Freq>5,] # 只保留频数多于5的词
#sd <- sd[-grep(pattern="华为|手机",sd$kd),] # 删除一些无实际意义的中性词
sd <- sd[order(sd$Freq,decreasing=TRUE),] # 排序
sd
wordcloud2(sd) # 绘图,提供很丰富的参数,可以做得很漂亮
词频
词云图
网友评论