美文网首页
Go语言基础08——HTTP编程

Go语言基础08——HTTP编程

作者: Spring618 | 来源:发表于2019-03-20 19:23 被阅读0次

    请求报文格式分析

    package main
    
    import (
        "fmt"
        "net"
    )
    
    func main() {
        fmt.Println("Http请求包格式演示案例")
        listener, err := net.Listen("tcp", "127.0.0.1:8000") //监听
        if err != nil {
            fmt.Println("err:", err)
            return
        }
    
        defer listener.Close() //关闭
    
        for {
            //阻塞,等待用户连接
            conn, err := listener.Accept()
            if err != nil {
                fmt.Println("err:", err)
                return
            }
            // 接受用户请求
            buff := make([]byte, 1024)
            n, err1 := conn.Read(buff)
            if err1 != nil {
                fmt.Println("err1:", err1)
                continue
            }
    
            fmt.Println("buff = ", string(buff[:n]))
            defer conn.Close() //关闭当前用户链接
        }
    
    }
    
    

    浏览器访问:http://127.0.0.1:8000/
    控制台输出:

    GET / HTTP/1.1
    Host: 127.0.0.1:8000
    Connection: keep-alive
    Upgrade-Insecure-Requests: 1
    User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36
    Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
    Accept-Encoding: gzip, deflate, br
    Accept-Language: zh-CN,zh;q=0.9
    

    响应报文测试方法

    package main
    
    import (
        "fmt"
        "net/http"
    )
    
    func myHandler(w http.ResponseWriter, r *http.Request) {
        w.Write([]byte("hello go!"))
    }
    
    func main() {
        fmt.Println("响应报文格式演示案例")
        http.HandleFunc("/go", myHandler)
        //在指定的地址进行监听
        http.ListenAndServe("127.0.0.1:8000", nil)
    }
    
    

    浏览器访问:http://127.0.0.1:8000/go
    页面显示:hello go!

    http客户端编程

    访问百度

    package main
    
    import (
        "fmt"
        "net/http"
    )
    
    func main() {
        fmt.Println("http编程演示案例")
        resp, err := http.Get("https://www.baidu.com") //func Get(url string) (resp *Response, err error)
        if err != nil {
            fmt.Printf("err:", err)
            return
        }
        defer resp.Body.Close()
        fmt.Println("Status = ", resp.Status)
        fmt.Println("StatusCode = ", resp.StatusCode)
        fmt.Println("Header = ", resp.Header)
        fmt.Println("Body = ", resp.Body)
        
    }
    
    

    输出结果:

    Status =  200 OK
    StatusCode =  200
    Header =  map[Accept-Ranges:[bytes] Cache-Control:[no-cache] Connection:[Keep-Alive] Content-Length:[227] Content-Type:[text/html] Date:[Wed, 20 Mar 2019 11:10:52 GMT] Etag:["5c7cdb1f-e3"] Last-Modified:[Mon, 04 Mar 2019 08:00:31 GMT] P3p:[CP=" OTI DSP COR IVA OUR IND COM "] Pragma:[no-cache] Server:[BWS/1.1] Set-Cookie:[BD_NOT_HTTPS=1; path=/; Max-Age=300 BIDUPSID=1EB8D042488157FB56779477283469A8; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com PSTM=1553080252; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com] Strict-Transport-Security:[max-age=0] X-Ua-Compatible:[IE=Edge,chrome=1]]
    Body =  &{0xc000034080 {0 0} false <nil> 0x60dd50 0x60dcd0}
    

    百度贴吧爬虫

    package main
    
    import (
        "fmt"
        "net/http"
        "os"
        "strconv"
        //"os"
    )
    
    func main() {
        fmt.Println("百度贴吧爬虫编程演示案例")
        // http://tieba.baidu.com/f?kw=go%E8%AF%AD%E8%A8%80&ie=utf-8&pn=50
        // http://tieba.baidu.com/f?kw=go%E8%AF%AD%E8%A8%80&ie=utf-8&pn=100
        var start, end int
        fmt.Printf("输入起始页:")
        fmt.Scan(&start)
        fmt.Printf("输入结束页:")
        fmt.Scan(&end)
        doWork(start, end)
    }
    
    func doWork(start, end int) {
        fmt.Printf("正在爬去 %d - %d 页的数据\n", start, end)
        for i := start; i <= end; i++ {
            getPage(i)
        }
    }
    
    func getPage(page int) {
        var url string
        url = "http://tieba.baidu.com/f?kw=go%E8%AF%AD%E8%A8%80&ie=utf-8&pn=" + strconv.Itoa(page*50)
        fmt.Println("url = ", url)
        res, err := httpGet(url)
        if err != nil {
            fmt.Println("err:", err)
            return
        }
        // fmt.Println("res:", res)
        // 把内容写在文件里 1.html
        fileName := strconv.Itoa(page) + ".html"
        f1, err1 := os.Create(fileName)
        if err1 != nil {
            fmt.Println("err1:", err1)
            return
        }
        f1.WriteString(res)
    
    }
    func httpGet(url string) (res string, err error) {
        resp, err1 := http.Get(url) //func Get(url string) (resp *Response, err error)
        if err1 != nil {
            fmt.Println("err1:", err1)
            err = err1
            return
        }
        defer resp.Body.Close()
    
        buff := make([]byte, 1024)
        for {
            n, _ := resp.Body.Read(buff) //(n int, err error)
            // if err2 != nil {
            //  fmt.Println("err2:", err2)
            //  err = err2
            //  return
            // }// 这里竟然不能这么写???
            if n == 0 {
                fmt.Println("读取结束:")
                break
            }
            res += string(buff[:n])
    
        }
    
        return
    
    }
    
    

    并发版网络爬虫:

    package main
    
    import (
        "fmt"
        "net/http"
        "os"
        "strconv"
        //"os"
    )
    
    var mPage = make(chan int)
    
    func main() {
        fmt.Println("百度贴吧爬虫编程演示案例")
        // http://tieba.baidu.com/f?kw=go%E8%AF%AD%E8%A8%80&ie=utf-8&pn=50
        // http://tieba.baidu.com/f?kw=go%E8%AF%AD%E8%A8%80&ie=utf-8&pn=100
        var start, end int
        fmt.Printf("输入起始页:")
        fmt.Scan(&start)
        fmt.Printf("输入结束页:")
        fmt.Scan(&end)
        doWork(start, end)
    }
    
    func doWork(start, end int) {
        fmt.Printf("正在爬去 %d - %d 页的数据\n", start, end)
    
        for i := start; i <= end; i++ {
            go getPage(i)
        }
    
        for i := start; i <= end; i++ {
            fmt.Printf("第 %d页 爬去完成\n", <-mPage)
        }
    }
    
    func getPage(page int) {
        var url string
        url = "http://tieba.baidu.com/f?kw=go%E8%AF%AD%E8%A8%80&ie=utf-8&pn=" + strconv.Itoa(page*50)
        //fmt.Println("url = ", url)
        fmt.Printf("开始爬去第 %d 页,%s\n", page, url)
        res, err := httpGet(url)
        if err != nil {
            fmt.Println("err:", err)
            return
        }
        mPage <- page
        // fmt.Println("res:", res)
        // 把内容写在文件里 1.html
        fileName := strconv.Itoa(page) + ".html"
        f1, err1 := os.Create(fileName)
        if err1 != nil {
            fmt.Println("err1:", err1)
            return
        }
        f1.WriteString(res)
    
    }
    func httpGet(url string) (res string, err error) {
        resp, err1 := http.Get(url) //func Get(url string) (resp *Response, err error)
        if err1 != nil {
            fmt.Println("err1:", err1)
            err = err1
            return
        }
        defer resp.Body.Close()
    
        buff := make([]byte, 1024)
        for {
            n, _ := resp.Body.Read(buff) //(n int, err error)
            // if err2 != nil {
            //  fmt.Println("err2:", err2)
            //  err = err2
            //  return
            // }// 这里竟然不能这么写???
            if n == 0 {
                //fmt.Println("读取结束")
                break
            }
            res += string(buff[:n])
    
        }
    
        return
    
    }
    
    

    段子爬虫

    package main
    
    import (
        "fmt"
        "net/http"
        "os"
        "regexp"
        "strconv"
        //"os"
    )
    
    var mPage = make(chan int)
    
    func main() {
        fmt.Println("段子爬虫编程演示案例")
        // http://tieba.baidu.com/f?kw=go%E8%AF%AD%E8%A8%80&ie=utf-8&pn=50
        // http://tieba.baidu.com/f?kw=go%E8%AF%AD%E8%A8%80&ie=utf-8&pn=100
        var start, end int
    
        // fmt.Printf("输入起始页:")
        // fmt.Scan(&start)
        // fmt.Printf("输入结束页:")
        // fmt.Scan(&end)
        start = 1
        end = 1
    
        doWork(start, end)
    }
    
    func doWork(start, end int) {
        fmt.Printf("正在爬去 %d - %d 页的数据\n", start, end)
    
        for i := start; i <= end; i++ {
            go getPage(i)
        }
    
        for i := start; i <= end; i++ {
            fmt.Printf("第 %d页 爬去完成\n", <-mPage)
        }
    }
    
    func getPage(page int) {
        var url string
        url = "http://tieba.baidu.com/f?kw=go%E8%AF%AD%E8%A8%80&ie=utf-8&pn=" + strconv.Itoa(page*50)
        url = "https://www.pengfue.com/index_" + strconv.Itoa(page) + ".html"
        //fmt.Println("url = ", url)
        fmt.Printf("开始爬去第 %d 页,%s\n", page, url)
        res, err := httpGet(url)
        if err != nil {
            fmt.Println("err:", err)
            return
        }
    
        //<a href="https://www.pengfue.com/content_1857662_1.html" target="_blank">游泳圈</a>
        reg := regexp.MustCompile(`<h1 class="dp-b"><a href="(?s:(.*?))"`)
        if reg == nil {
            fmt.Println("regexp error.")
            return
        }
        urls := reg.FindAllStringSubmatch(res, -1)
    
        fileTitle := make([]string, 0)
        fileContent := make([]string, 0)
    
        // fmt.Println("urls:", urls)
        for k, v := range urls {
            fmt.Println("k,v:", k, v[1])
            res, err := httpGet(v[1])
    
            if err != nil {
                fmt.Println("err:", err)
                return
            }
    
            // 处理title
            reg := regexp.MustCompile(`<h1>(?s:(.*?))</h1>`)
            if reg == nil {
                fmt.Println("regexp error.")
                return
            }
            titles := reg.FindAllStringSubmatch(res, 1)
            for _, title := range titles {
                fmt.Println("title:", title[1]) //this is title.
                // 把内容存储到文件中
                fileTitle = append(fileTitle, title[1])
            }
    
            // 处理content
            regContent := regexp.MustCompile(`<div class="content-txt pt10">(?s:(.*?))<a id="prev"`)
            if regContent == nil {
                fmt.Println("regexp error.")
                return
            }
            contents := regContent.FindAllStringSubmatch(res, 1)
            for _, content := range contents {
                fmt.Println("content:", content[1]) //this is content.
                // 把内容存储到文件中
                fileContent = append(fileContent, content[1])
            }
    
        }
    
        //fmt.Println("fileTitle = ", fileTitle)
        //fmt.Println("fileContent = ", fileContent)
    
        saveToFile(page, fileTitle, fileContent)
    
        // fmt.Println("res:", res)
    
        mPage <- page
    
    }
    
    func saveToFile(page int, fileTitle, fileContent []string) {
        // 把内容写在文件里 1.html
        fileName := strconv.Itoa(page) + ".txt"
        f1, err1 := os.Create(fileName)
        if err1 != nil {
            fmt.Println("err1:", err1)
            return
        }
    
        defer f1.Close()
    
        n := len(fileTitle)
        for i := 0; i < n; i++ {
            f1.WriteString(fileTitle[i] + "\n")
            f1.WriteString(fileContent[i] + "\n")
            f1.WriteString("===================================================================\n")
        }
    
    }
    
    func httpGet(url string) (res string, err error) {
        resp, err1 := http.Get(url) //func Get(url string) (resp *Response, err error)
        if err1 != nil {
            fmt.Println("err1:", err1)
            err = err1
            return
        }
        defer resp.Body.Close()
    
        buff := make([]byte, 1024)
        for {
            n, _ := resp.Body.Read(buff) //(n int, err error)
            // if err2 != nil {
            //  fmt.Println("err2:", err2)
            //  err = err2
            //  return
            // }// 这里竟然不能这么写???
            if n == 0 {
                //fmt.Println("读取结束")
                break
            }
            res += string(buff[:n])
    
        }
    
        return
    
    }
    
    

    还需要处理title和content中包含的特殊格式。

    并发的爬虫:

    package main
    
    import (
        "fmt"
        "net/http"
        "os"
        "regexp"
        "strconv"
        "strings"
        //"os"
    )
    
    var mPage = make(chan int)
    
    func main() {
        fmt.Println("---段子爬虫编程演示案例---")
        // http://tieba.baidu.com/f?kw=go%E8%AF%AD%E8%A8%80&ie=utf-8&pn=50
        // http://tieba.baidu.com/f?kw=go%E8%AF%AD%E8%A8%80&ie=utf-8&pn=100
        var start, end int
    
        fmt.Printf("输入起始页:")
        fmt.Scan(&start)
        fmt.Printf("输入结束页:")
        fmt.Scan(&end)
        // start = 1
        // end = 1
    
        doWork(start, end)
    }
    
    func doWork(start, end int) {
        fmt.Printf("正在爬去 %d - %d 页的数据...\n", start, end)
    
        for i := start; i <= end; i++ {
            go getPage(i)
        }
    
        for i := start; i <= end; i++ {
            fmt.Printf("第【%d】页 爬取完成\n", <-mPage)
        }
    }
    
    func getPage(page int) {
        var url string
        url = "http://tieba.baidu.com/f?kw=go%E8%AF%AD%E8%A8%80&ie=utf-8&pn=" + strconv.Itoa(page*50)
        url = "https://www.pengfue.com/index_" + strconv.Itoa(page) + ".html"
        //fmt.Println("url = ", url)
        fmt.Printf("开始爬去第【%d】页:%s\n", page, url)
        res, err := httpGet(url)
        if err != nil {
            fmt.Println("err:", err)
            return
        }
    
        //<a href="https://www.pengfue.com/content_1857662_1.html" target="_blank">游泳圈</a>
        reg := regexp.MustCompile(`<h1 class="dp-b"><a href="(?s:(.*?))"`)
        if reg == nil {
            fmt.Println("regexp error.")
            return
        }
        urls := reg.FindAllStringSubmatch(res, -1)
    
        fileTitle := make([]string, 0)
        fileContent := make([]string, 0)
        fileUrl := make([]string, 0)
        // fmt.Println("urls:", urls)
        for k, v := range urls {
            fmt.Println("url:", k, v[1])
            res, err := httpGet(v[1])
    
            if err != nil {
                fmt.Println("err:", err)
                return
            }
    
            // 处理title
            reg := regexp.MustCompile(`<h1>(?s:(.*?))</h1>`)
            if reg == nil {
                fmt.Println("regexp error.")
                return
            }
            titles := reg.FindAllStringSubmatch(res, 1)
            var tempTitle string   //用于处理title
            var tempContent string //用于处理content
            for _, title := range titles {
    
                // 把内容存储到文件中
                tempTitle = title[1]
                tempTitle = strings.Replace(tempTitle, "\r", "", -1)
                tempTitle = strings.Replace(tempTitle, "\n", "", -1)
                tempTitle = strings.Replace(tempTitle, " ", "", -1)
                tempTitle = strings.Replace(tempTitle, "\t", "", -1)
                fmt.Println("title:", tempTitle) //this is title.
                fileTitle = append(fileTitle, tempTitle)
            }
    
            // 处理content
            regContent := regexp.MustCompile(`<div class="content-txt pt10">(?s:(.*?))<a id="prev"`)
            if regContent == nil {
                fmt.Println("regexp error.")
                return
            }
            contents := regContent.FindAllStringSubmatch(res, 1)
            for _, content := range contents {
    
                // 把内容存储到文件中
    
                tempContent = content[1]
                // tempContent = strings.Replace(tempContent, "\r", "", -1)
                tempContent = strings.Replace(tempContent, "\n", "", -1)
                // tempContent = strings.Replace(tempContent, " ", "", -1)
                tempContent = strings.Replace(tempContent, "\t", "", -1)
                fmt.Println("content:", tempContent) //this is content.
                fileContent = append(fileContent, tempContent)
            }
            // 处理url
            fileUrl = append(fileUrl, v[1])
    
        }
    
        //fmt.Println("fileTitle = ", fileTitle)
        //fmt.Println("fileContent = ", fileContent)
    
        //saveToFile(page, fileTitle, fileContent)
        saveToFileWithUrl(page, fileTitle, fileContent, fileUrl)
        // fmt.Println("res:", res)
    
        mPage <- page
    
    }
    
    func saveToFile(page int, fileTitle, fileContent []string) {
        // 把内容写在文件里 1.html
        fileName := strconv.Itoa(page) + ".txt"
        f1, err1 := os.Create(fileName)
        if err1 != nil {
            fmt.Println("err1:", err1)
            return
        }
    
        defer f1.Close()
    
        n := len(fileTitle)
        for i := 0; i < n; i++ {
            f1.WriteString(fileTitle[i] + "\n")
            f1.WriteString(fileContent[i] + "\n")
            f1.WriteString("===================================================================\n")
        }
    
    }
    func saveToFileWithUrl(page int, fileTitle, fileContent, fileUrl []string) {
        // 把内容写在文件里 1.html
        fileName := strconv.Itoa(page) + ".txt"
        f1, err1 := os.Create(fileName)
        if err1 != nil {
            fmt.Println("err1:", err1)
            return
        }
    
        defer f1.Close()
    
        n := len(fileTitle)
        for i := 0; i < n; i++ {
            f1.WriteString(fileTitle[i] + "\n")
            f1.WriteString(fileContent[i] + "\n")
            f1.WriteString(fileUrl[i] + "\n")
            f1.WriteString("===================================================================\n")
        }
    
    }
    
    func httpGet(url string) (res string, err error) {
        resp, err1 := http.Get(url) //func Get(url string) (resp *Response, err error)
        if err1 != nil {
            fmt.Println("err1:", err1)
            err = err1
            return
        }
        defer resp.Body.Close()
    
        buff := make([]byte, 1024)
        for {
            n, _ := resp.Body.Read(buff) //(n int, err error)
            // if err2 != nil {
            //  fmt.Println("err2:", err2)
            //  err = err2
            //  return
            // }// 这里竟然不能这么写???
            if n == 0 {
                //fmt.Println("读取结束")
                break
            }
            res += string(buff[:n])
    
        }
    
        return
    
    }
    
    

    END.

    相关文章

      网友评论

          本文标题:Go语言基础08——HTTP编程

          本文链接:https://www.haomeiwen.com/subject/itgtvqtx.html