工具:Rstudio,R(3.3.4)
install.packages("devtools")
library(devtools)
devtools::install_github("sjhfx/rwda")
library(RCurl)
library(bitops)
library(rjson)
library(jiebaR)
library(jiebaRD)
library(wordcloud2)
library(vioplot)
library(sm)
library(curl)
library(plyr)
library(rwda)
access_token<-"your access_token here"#you can get it in http://open.weibo.com/tools/console
weiboid="/the mid of post"
#you can find it by
#1, click the time under the weibo user name at the specific post,enter the page for details;
#2, click the right mouse button to view the source code;
#3, search "mid =", there is a string of numbers behind it,that's what we need
#由于微博对个人开发者一次抓取评论数量有限制,大概在2000条(每页50条),
对包里的get_comments函数进行改进,使其可以自行设置起始页数与终结页数
get_com<-function (access_token, weiboid, spage,epage)
{
comments = list()
df_comments = ldply(comments, get_fields, .id = NULL)
page = spage
while (page <= epage) {
url = paste("https://api.weibo.com/2/comments/show.json?id=",
weiboid, "&page=", page, "&access_token=", access_token,
sep = "")
comments = fromJSON(getURL(url, .opts = list(ssl.verifypeer = FALSE)))$comments
print(paste("Page", page, ": ", length(comments), "comments"))
df_comments = rbind(df_comments, ldply(comments, get_fields,
.id = NULL))
page = page + 1
Sys.sleep(1)
}
return(df_comments)
}
dat<-get_com(access_token,weiboid,spage=40,epage=80)
getwd()
write.table(dat,file="C:/Users/andy/Documents/weibo_comments.csv")
##subset the rows contains keywords
head(dat)
n=grep("\keywords",dat$text)#get the index of rows that contain keywords
dat_key=dat[n,]
##create a wordcloud graph
weibo_cloud(dat, stopwords = c("心心", "回复","哈哈","/doge"))
参考文章:
网友评论