刷访问量

作者: arbboter | 来源:发表于2015-02-13 11:57 被阅读233次

    主程序

    #include "my_curl.h"
    #include "check.h"
    #include <string.h>
    #include <set>
    #include <vector>
    #include <string>
    #include <iostream>
    #include <fstream>
    #include <time.h>
    using namespace std;
    
    void GetCsdnBlogList(const string& uid, set<string>& setUrl);
    
    int main(int argc, char* argv[])
    {
        set<string> setUrl;
        if(argc > 1)
        {
            GetCsdnBlogList(argv[1], setUrl);
        }
        else
        {
            printf("两种工作方式:\n1. %s csdn_bloger_uid 访问CSDN博客列表\n", argv[0]);
            printf("2. 默认访问本地网站列表文件site.txt指定的地址\n");
            ifstream ifile;
            ifile.open("site.txt");
            if(ifile.is_open())
            {
                string strLine;
                while(getline(ifile, strLine))
                {
                    setUrl.insert(strLine);
                }
                ifile.close();
            }
        }
        if(setUrl.size() <= 0)
        {
            return -1;
        }
    
        vector<string> vecUrl;
        for (set<string>::iterator ite=setUrl.begin(); ite!=setUrl.end(); ite++)
        {
            vecUrl.push_back(*ite);
        }
        setUrl.clear();
        
        int i = 0;
        int j = 0;
        CHttpClient http;
        string      strResult;
    
        srand((unsigned int)time(NULL));
        while(i<20)
        {
            j = rand()%vecUrl.size();
            http.Get(vecUrl[j], strResult);
            cout << ++i << "\t" << vecUrl[j] << endl;
        }
        
        return 0;
    }
    
    bool GetCsdnBlogArticle(const string& html, const string& uid, set<string>& setUrl)
    {
        const string strKeyWord = "link_title";
        const string strKeyArticle= "article/details/";
        const string strKeyEnd = "\">";
        const string strArticle = "http://blog.csdn.net/" + uid+ "/article/details/";
        const char* pBeg = NULL;
        const char* pEnd = NULL;
        char        szBuf[64] = {0};
        int         nLen = sizeof(szBuf)-1;
        size_t      nCount = setUrl.size();
    
        pEnd = pBeg = html.c_str();
        while(pBeg = std::strstr(pBeg, strKeyWord.c_str()))
        {
            pBeg = std::strstr(pBeg, strKeyWord.c_str());
            pBeg = std::strstr(pBeg, strKeyArticle.c_str());
            pBeg += strKeyArticle.length();
            pEnd = std::strstr(pBeg, strKeyEnd.c_str());
    
            memset(szBuf, 0, sizeof(szBuf));
            memcpy(szBuf, pBeg, (pEnd-pBeg)>nLen ? nLen:(pEnd-pBeg));
            setUrl.insert(strArticle + szBuf);
            cout << "\r" << szBuf;
        }
    
        return !(setUrl.size()==nCount);
    }
    
    void GetCsdnBlogList(const string& uid, set<string>& setUrl)
    {
        CHttpClient http;
        string      strUrl;
        string      str;
        char        szBuf[256]; 
        const string strUsr = "http://blog.csdn.net/"+ uid + "/article/list/";
    
        int i = 0;
        cout << "抓到文章 -> \n";
        do
        {
            sprintf_s(szBuf, "%s%d", strUsr.c_str(), ++i);
            _CrtDumpMemoryLeaks();
            http.Get(szBuf, str);
            _CrtDumpMemoryLeaks();
        }while(GetCsdnBlogArticle(str, uid, setUrl));
    
        cout << "\n共抓取到文章" << setUrl.size() << "篇" << endl;
    }
    

    封装的curl类

    #include "my_curl.h"
    #include "curl/curl.h"
    #include <string>
    
    
    #pragma comment(lib, "ws2_32.lib")
    #pragma comment(lib, "wldap32.lib")
    
    #if _DEBUG
    #pragma comment(lib, "libcurld.lib")
    #else
    #pragma comment(lib, "libcurl.lib")
    #endif
    
    CHttpClient::CHttpClient(void) : 
    m_bDebug(false)
    {
    
    }
    
    CHttpClient::~CHttpClient(void)
    {
    
    }
    
    static int OnDebug(CURL *, curl_infotype itype, char * pData, size_t size, void *)
    {
        if(itype == CURLINFO_TEXT)
        {
            //printf("[TEXT]%s\n", pData);
        }
        else if(itype == CURLINFO_HEADER_IN)
        {
            printf("[HEADER_IN]%s\n", pData);
        }
        else if(itype == CURLINFO_HEADER_OUT)
        {
            printf("[HEADER_OUT]%s\n", pData);
        }
        else if(itype == CURLINFO_DATA_IN)
        {
            printf("[DATA_IN]%s\n", pData);
        }
        else if(itype == CURLINFO_DATA_OUT)
        {
            printf("[DATA_OUT]%s\n", pData);
        }
        return 0;
    }
    
    static size_t OnWriteData(void* buffer, size_t size, size_t nmemb, void* lpVoid)
    {
        std::string* str = dynamic_cast<std::string*>((std::string *)lpVoid);
        if( NULL == str || NULL == buffer )
        {
            return -1;
        }
    
        char* pData = (char*)buffer;
        str->append(pData, size * nmemb);
        return nmemb;
    }
    
    int CHttpClient::Post(const std::string & strUrl, const std::string & strPost, std::string & strResponse)
    {
        CURLcode res;
        CURL* curl = curl_easy_init();
        if(NULL == curl)
        {
            return CURLE_FAILED_INIT;
        }
        if(m_bDebug)
        {
            curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
            curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, OnDebug);
        }
        curl_easy_setopt(curl, CURLOPT_URL, strUrl.c_str());
        curl_easy_setopt(curl, CURLOPT_POST, 1);
        curl_easy_setopt(curl, CURLOPT_POSTFIELDS, strPost.c_str());
        curl_easy_setopt(curl, CURLOPT_READFUNCTION, NULL);
        curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, OnWriteData);
        curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&strResponse);
        curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
        curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 3);
        curl_easy_setopt(curl, CURLOPT_TIMEOUT, 3);
        res = curl_easy_perform(curl);
        curl_easy_cleanup(curl);
        return res;
    }
    
    int CHttpClient::Get(const std::string & strUrl, std::string & strResponse)
    {
        CURLcode res;
        CURL* curl = curl_easy_init();
        if(NULL == curl)
        {
            return CURLE_FAILED_INIT;
        }
        if(m_bDebug)
        {
            curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
            curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, OnDebug);
        }
        
        curl_easy_setopt(curl, CURLOPT_URL, strUrl.c_str());
        curl_easy_setopt(curl, CURLOPT_READFUNCTION, NULL);
        curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, OnWriteData);
        curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&strResponse);
        /**
        * 当多个线程都使用超时处理的时候,同时主线程中有sleep或是wait等操作。
        * 如果不设置这个选项,libcurl将会发信号打断这个wait从而导致程序退出。
        */
        curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
        curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 3);
        curl_easy_setopt(curl, CURLOPT_TIMEOUT, 3);
        res = curl_easy_perform(curl);
        curl_easy_cleanup(curl);
        return res;
    }
    
    int CHttpClient::Posts(const std::string & strUrl, const std::string & strPost, std::string & strResponse, const char * pCaPath)
    {
        CURLcode res;
        CURL* curl = curl_easy_init();
        if(NULL == curl)
        {
            return CURLE_FAILED_INIT;
        }
        if(m_bDebug)
        {
            curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
            curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, OnDebug);
        }
        curl_easy_setopt(curl, CURLOPT_URL, strUrl.c_str());
        curl_easy_setopt(curl, CURLOPT_POST, 1);
        curl_easy_setopt(curl, CURLOPT_POSTFIELDS, strPost.c_str());
        curl_easy_setopt(curl, CURLOPT_READFUNCTION, NULL);
        curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, OnWriteData);
        curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&strResponse);
        curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
        if(NULL == pCaPath)
        {
            curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, false);
            curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, false);
        }
        else
        {
            //缺省情况就是PEM,所以无需设置,另外支持DER
            //curl_easy_setopt(curl,CURLOPT_SSLCERTTYPE,"PEM");
            curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, true);
            curl_easy_setopt(curl, CURLOPT_CAINFO, pCaPath);
        }
        curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 3);
        curl_easy_setopt(curl, CURLOPT_TIMEOUT, 3);
        res = curl_easy_perform(curl);
        curl_easy_cleanup(curl);
        return res;
    }
    
    int CHttpClient::Gets(const std::string & strUrl, std::string & strResponse, const char * pCaPath)
    {
        CURLcode res;
        CURL* curl = curl_easy_init();
        if(NULL == curl)
        {
            return CURLE_FAILED_INIT;
        }
        if(m_bDebug)
        {
            curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
            curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, OnDebug);
        }
        curl_easy_setopt(curl, CURLOPT_URL, strUrl.c_str());
        curl_easy_setopt(curl, CURLOPT_READFUNCTION, NULL);
        curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, OnWriteData);
        curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&strResponse);
        curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
        if(NULL == pCaPath)
        {
            curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, false);
            curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, false);
        }
        else
        {
            curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, true);
            curl_easy_setopt(curl, CURLOPT_CAINFO, pCaPath);
        }
        curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 3);
        curl_easy_setopt(curl, CURLOPT_TIMEOUT, 3);
        res = curl_easy_perform(curl);
        curl_easy_cleanup(curl);
        return res;
    }
    
    ///////////////////////////////////////////////////////////////////////////////////////////////
    
    void CHttpClient::SetDebug(bool bDebug)
    {
        m_bDebug = bDebug;
    }
    
    

    相关文章

      网友评论

      本文标题:刷访问量

      本文链接:https://www.haomeiwen.com/subject/bzisxttx.html