package main
import (
"crypto/md5"
"database/sql"
"encoding/hex"
"fmt"
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
_ "github.com/go-sql-driver/mysql"
)
func main() {
Chaindd()
}
func Chaindd() {
url := "http://www.chaindd.com/"
doc, _ := goquery.NewDocument(url)
doc.Find(".article-list li").Not("li:last-child").Each(func(i int, s *goquery.Selection) {
db, _ := sql.Open("mysql", "root:123456@/news?charset=utf8")
//链得得图片链接
chain_img, _ := s.Find(".pic img").Attr("src")
// fmt.Println(chain_img)
//链得得图片标题
chain_tit := s.Find(".cont h3 .title").Text()
chain_tit_biao := strings.Replace(chain_tit, ",", " ", -1)
chain_tit_ok_str_ok := strings.Replace(chain_tit_biao, " ", "", -1)
//存入ES唯一ID
h := md5.New()
h.Write([]byte(chain_tit)) // 需要加密的字符串为 123456
cipherStr := h.Sum(nil)
result_tit_ok_md5 := hex.EncodeToString(cipherStr) // 输出加密结果
EarTit_md_ok := result_tit_ok_md5[0:10]
//获取当前时间戳
currentTime := time.Now().Unix()
currentTime_str := strconv.FormatInt(currentTime, 10)
currentTime_ok := currentTime_str[6:]
//拼接ES唯一ID
only_id := EarTit_md_ok + currentTime_ok
// fmt.Println(chain_tit)
//链得得摘要
chain_main := s.Find(".cont .summary").Text()
chain_main_ok := strings.Replace(chain_main, "\t", " ", -1)
chain_main_ok_str := strings.Replace(chain_main_ok, "\n", " ", -1)
chain_main_ok_strok := strings.Replace(chain_main_ok_str, ""+`"`+"", "'", -1)
//文章来源
chain_Addr := s.Find(".cont .info .author .name").Text()
chain_Addr_ok := strings.Replace(chain_Addr, chain_Addr, "链得得", 1)
// fmt.Println(chain_Addr)
//文章链接
chain_href, _ := s.Find(".cont h3 a").Attr("href")
//拼接请求
chain_href_ok := "http://www.chaindd.com" + chain_href
// fmt.Println(chain_href_ok)
docs, _ := goquery.NewDocument(chain_href_ok)
// fmt.Println(docs)
//发布时间
chain_time := docs.Find("article .authors .time").Text() + ":00"
// fmt.Println(chain_time)
timeLayout := "2006-01-02 15:04:05" //转化所需模板
loc, _ := time.LoadLocation("Local") //重要:获取时区
theTime, _ := time.ParseInLocation(timeLayout, chain_time, loc) //使用模板在对应时区转化为time.time类型
sr := theTime.Unix()
time := strconv.FormatInt(sr, 10)
//文章内容
chain_con := docs.Find(".inner").Text()
chain_con_ok := strings.Replace(chain_con, "\t", " ", -1)
chain_con_ok_str := strings.Replace(chain_con_ok, "\n", "/n", -1)
chain_con_ok_strok := strings.Replace(chain_con_ok_str, ""+`"`+"", "'", -1)
//发送post请求
// urls := "http://192.168.31.185:9200/bytenews/document/" + only_id + ""
// post := "{\"abstract\":\"" + chain_main_ok_strok + "\",\"category\":\"" + "" + "\",\"content\":\"" + chain_con_ok_strok + "\",\"purl\":\"" + chain_img + "\",\"source\":\"" + chain_Addr_ok + "\",\"timestamp\":\"" + time + "\",\"title\":\"" + chain_tit + "\",\"url\":\"" + chain_href_ok + "\"}"
// // fmt.Println(post)
// var jsonStr = []byte(post)
// req, err := http.NewRequest("PUT", urls, bytes.NewBuffer(jsonStr))
// req.Header.Set("Content-Type", "application/json;charset:utf-8")
// client := &http.Client{}
// resp, err := client.Do(req)
// if err != nil {
// panic(err)
// }
// defer resp.Body.Close()
// body, _ := ioutil.ReadAll(resp.Body)
// fmt.Println(string(body))
stmt, err := db.Prepare("REPLACE newsc SET onlyid=?,title=?,imgUrl=?,content=?,main=?,addr=?,href=?,time=?")
checkErr(err)
res, err := stmt.Exec(only_id, chain_tit_ok_str_ok, chain_img, chain_main_ok_strok, chain_con_ok_strok, chain_Addr_ok, chain_href_ok, time)
checkErr(err)
id, err := res.LastInsertId()
checkErr(err)
fmt.Println(id)
})
}
func checkErr(err error) {
if err != nil {
panic(err)
}
}
网友评论