//下载插件工具
go get -v github.com/gpmgo/gopm
//自动导入包插件
gopm get -g -v golang.org/x/tools/cmd/goimports
//转码插件
gopm get -g -v golang.org/x/text
//自动检测网页编码
gopm get -g -v golang.org/x/net/html
package main
import (
"net/http"
"io/ioutil"
"fmt"
"golang.org/x/text/transform"
"io"
"golang.org/x/text/encoding"
"golang.org/x/net/html/charset"
"bufio"
"regexp"
)
func main() {
resp, err := http.Get("http://www.zhenai.com/zhenghun")
if err != nil {
panic(err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
fmt.Println("Error:status code", resp.StatusCode)
return
}
e := determinEncoding(resp.Body)
utf8Reader := transform.NewReader(resp.Body, e.NewDecoder())
all, err := ioutil.ReadAll(utf8Reader)
if err != nil {
panic(err)
}
printCityList(all)
}
func printCityList(contents []byte) {
re := regexp.MustCompile(`<a href="(http://www.zhenai.com/zhenghun/[0-0a-z]+)"[^>]*>([^<]+)</a>`)
matches := re.FindAllSubmatch(contents, -1)
for _, m := range matches {
fmt.Printf("City: %s, URL: %s\n", m[2], m[1])
}
fmt.Printf("Matches found:%d\n", len(matches))
}
func determinEncoding(r io.Reader) encoding.Encoding {
bytes, err := bufio.NewReader(r).Peek(1024)
if err != nil {
panic(err)
}
e, _, _ := charset.DetermineEncoding(bytes, "")
return e
}
网友评论