// DATE: 2017-12-29
// 顺序执行版本需要 Total time: 12.393550402
// go 协程版本只需要 [0.5, 1.134134881]
//
package main
import (
"fmt"
"github.com/PuerkitoBio/goquery"
"log"
"bytes"
"io/ioutil"
"golang.org/x/text/transform"
"golang.org/x/text/encoding/simplifiedchinese"
"strconv"
"time"
)
const (
host = "http://www.ygdy8.net"
url = "http://www.ygdy8.net/html/gndy/dyzz/list_23_"
)
var pages []string
func main() {
start := time.Now()
ch := make(chan string)
getPage(1)
for _,page := range pages {
go getBTLink(page, ch)
}
for range pages { // 有点像是等待多少次 ch,未达到次数前,不会退出程序。但是很多时候是不知道可以等待多久的。
fmt.Printf("Link=%q\n", <-ch)
}
fmt.Printf("Total time: %v\n", time.Since(start).Seconds())
}
func getPage(index int) {
doc, err := goquery.NewDocument(url + strconv.Itoa(index) + ".html")
if err != nil {
log.Fatal(err) // 安排重试
}
doc.Find(".co_content8 .ulink").Each(func(i int, s *goquery.Selection) {
title, _ := GbkToUtf8([]byte(s.Text()))
href, _ := s.Attr("href")
href = host + href
fmt.Printf("Title=%q\nLink=%q\n\n", title, href)
//getBTLink(href)
pages = append(pages, href)
})
}
func getBTLink(url string, ch chan <- string) {
doc, err := goquery.NewDocument(url)
if err != nil {
log.Fatal(err) // 安排重试
}
doc.Find("#Zoom td a").Each(func(i int, selection *goquery.Selection) {
link,_ := selection.Attr("href")
link1,_ := GbkToUtf8([]byte(link))
//fmt.Printf("download link is %q\n", link1)
ch <- fmt.Sprintf("%s", link1)
})
}
// 页面是 gbk 编码
func GbkToUtf8(s []byte) ([]byte, error) {
reader := transform.NewReader(bytes.NewReader(s), simplifiedchinese.GBK.NewDecoder())
d, e := ioutil.ReadAll(reader)
if e != nil {
return nil, e
}
return d, nil
}
网友评论