刷访问量

作者: arbboter | 来源:发表于2015-02-13 11:57 被阅读233次

主程序

#include "my_curl.h"
#include "check.h"
#include <string.h>
#include <set>
#include <vector>
#include <string>
#include <iostream>
#include <fstream>
#include <time.h>
using namespace std;

void GetCsdnBlogList(const string& uid, set<string>& setUrl);

int main(int argc, char* argv[])
{
    set<string> setUrl;
    if(argc > 1)
    {
        GetCsdnBlogList(argv[1], setUrl);
    }
    else
    {
        printf("两种工作方式:\n1. %s csdn_bloger_uid 访问CSDN博客列表\n", argv[0]);
        printf("2. 默认访问本地网站列表文件site.txt指定的地址\n");
        ifstream ifile;
        ifile.open("site.txt");
        if(ifile.is_open())
        {
            string strLine;
            while(getline(ifile, strLine))
            {
                setUrl.insert(strLine);
            }
            ifile.close();
        }
    }
    if(setUrl.size() <= 0)
    {
        return -1;
    }

    vector<string> vecUrl;
    for (set<string>::iterator ite=setUrl.begin(); ite!=setUrl.end(); ite++)
    {
        vecUrl.push_back(*ite);
    }
    setUrl.clear();
    
    int i = 0;
    int j = 0;
    CHttpClient http;
    string      strResult;

    srand((unsigned int)time(NULL));
    while(i<20)
    {
        j = rand()%vecUrl.size();
        http.Get(vecUrl[j], strResult);
        cout << ++i << "\t" << vecUrl[j] << endl;
    }
    
    return 0;
}

bool GetCsdnBlogArticle(const string& html, const string& uid, set<string>& setUrl)
{
    const string strKeyWord = "link_title";
    const string strKeyArticle= "article/details/";
    const string strKeyEnd = "\">";
    const string strArticle = "http://blog.csdn.net/" + uid+ "/article/details/";
    const char* pBeg = NULL;
    const char* pEnd = NULL;
    char        szBuf[64] = {0};
    int         nLen = sizeof(szBuf)-1;
    size_t      nCount = setUrl.size();

    pEnd = pBeg = html.c_str();
    while(pBeg = std::strstr(pBeg, strKeyWord.c_str()))
    {
        pBeg = std::strstr(pBeg, strKeyWord.c_str());
        pBeg = std::strstr(pBeg, strKeyArticle.c_str());
        pBeg += strKeyArticle.length();
        pEnd = std::strstr(pBeg, strKeyEnd.c_str());

        memset(szBuf, 0, sizeof(szBuf));
        memcpy(szBuf, pBeg, (pEnd-pBeg)>nLen ? nLen:(pEnd-pBeg));
        setUrl.insert(strArticle + szBuf);
        cout << "\r" << szBuf;
    }

    return !(setUrl.size()==nCount);
}

void GetCsdnBlogList(const string& uid, set<string>& setUrl)
{
    CHttpClient http;
    string      strUrl;
    string      str;
    char        szBuf[256]; 
    const string strUsr = "http://blog.csdn.net/"+ uid + "/article/list/";

    int i = 0;
    cout << "抓到文章 -> \n";
    do
    {
        sprintf_s(szBuf, "%s%d", strUsr.c_str(), ++i);
        _CrtDumpMemoryLeaks();
        http.Get(szBuf, str);
        _CrtDumpMemoryLeaks();
    }while(GetCsdnBlogArticle(str, uid, setUrl));

    cout << "\n共抓取到文章" << setUrl.size() << "篇" << endl;
}

封装的curl类

#include "my_curl.h"
#include "curl/curl.h"
#include <string>


#pragma comment(lib, "ws2_32.lib")
#pragma comment(lib, "wldap32.lib")

#if _DEBUG
#pragma comment(lib, "libcurld.lib")
#else
#pragma comment(lib, "libcurl.lib")
#endif

CHttpClient::CHttpClient(void) : 
m_bDebug(false)
{

}

CHttpClient::~CHttpClient(void)
{

}

static int OnDebug(CURL *, curl_infotype itype, char * pData, size_t size, void *)
{
    if(itype == CURLINFO_TEXT)
    {
        //printf("[TEXT]%s\n", pData);
    }
    else if(itype == CURLINFO_HEADER_IN)
    {
        printf("[HEADER_IN]%s\n", pData);
    }
    else if(itype == CURLINFO_HEADER_OUT)
    {
        printf("[HEADER_OUT]%s\n", pData);
    }
    else if(itype == CURLINFO_DATA_IN)
    {
        printf("[DATA_IN]%s\n", pData);
    }
    else if(itype == CURLINFO_DATA_OUT)
    {
        printf("[DATA_OUT]%s\n", pData);
    }
    return 0;
}

static size_t OnWriteData(void* buffer, size_t size, size_t nmemb, void* lpVoid)
{
    std::string* str = dynamic_cast<std::string*>((std::string *)lpVoid);
    if( NULL == str || NULL == buffer )
    {
        return -1;
    }

    char* pData = (char*)buffer;
    str->append(pData, size * nmemb);
    return nmemb;
}

int CHttpClient::Post(const std::string & strUrl, const std::string & strPost, std::string & strResponse)
{
    CURLcode res;
    CURL* curl = curl_easy_init();
    if(NULL == curl)
    {
        return CURLE_FAILED_INIT;
    }
    if(m_bDebug)
    {
        curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
        curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, OnDebug);
    }
    curl_easy_setopt(curl, CURLOPT_URL, strUrl.c_str());
    curl_easy_setopt(curl, CURLOPT_POST, 1);
    curl_easy_setopt(curl, CURLOPT_POSTFIELDS, strPost.c_str());
    curl_easy_setopt(curl, CURLOPT_READFUNCTION, NULL);
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, OnWriteData);
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&strResponse);
    curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
    curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 3);
    curl_easy_setopt(curl, CURLOPT_TIMEOUT, 3);
    res = curl_easy_perform(curl);
    curl_easy_cleanup(curl);
    return res;
}

int CHttpClient::Get(const std::string & strUrl, std::string & strResponse)
{
    CURLcode res;
    CURL* curl = curl_easy_init();
    if(NULL == curl)
    {
        return CURLE_FAILED_INIT;
    }
    if(m_bDebug)
    {
        curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
        curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, OnDebug);
    }
    
    curl_easy_setopt(curl, CURLOPT_URL, strUrl.c_str());
    curl_easy_setopt(curl, CURLOPT_READFUNCTION, NULL);
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, OnWriteData);
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&strResponse);
    /**
    * 当多个线程都使用超时处理的时候,同时主线程中有sleep或是wait等操作。
    * 如果不设置这个选项,libcurl将会发信号打断这个wait从而导致程序退出。
    */
    curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
    curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 3);
    curl_easy_setopt(curl, CURLOPT_TIMEOUT, 3);
    res = curl_easy_perform(curl);
    curl_easy_cleanup(curl);
    return res;
}

int CHttpClient::Posts(const std::string & strUrl, const std::string & strPost, std::string & strResponse, const char * pCaPath)
{
    CURLcode res;
    CURL* curl = curl_easy_init();
    if(NULL == curl)
    {
        return CURLE_FAILED_INIT;
    }
    if(m_bDebug)
    {
        curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
        curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, OnDebug);
    }
    curl_easy_setopt(curl, CURLOPT_URL, strUrl.c_str());
    curl_easy_setopt(curl, CURLOPT_POST, 1);
    curl_easy_setopt(curl, CURLOPT_POSTFIELDS, strPost.c_str());
    curl_easy_setopt(curl, CURLOPT_READFUNCTION, NULL);
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, OnWriteData);
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&strResponse);
    curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
    if(NULL == pCaPath)
    {
        curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, false);
        curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, false);
    }
    else
    {
        //缺省情况就是PEM,所以无需设置,另外支持DER
        //curl_easy_setopt(curl,CURLOPT_SSLCERTTYPE,"PEM");
        curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, true);
        curl_easy_setopt(curl, CURLOPT_CAINFO, pCaPath);
    }
    curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 3);
    curl_easy_setopt(curl, CURLOPT_TIMEOUT, 3);
    res = curl_easy_perform(curl);
    curl_easy_cleanup(curl);
    return res;
}

int CHttpClient::Gets(const std::string & strUrl, std::string & strResponse, const char * pCaPath)
{
    CURLcode res;
    CURL* curl = curl_easy_init();
    if(NULL == curl)
    {
        return CURLE_FAILED_INIT;
    }
    if(m_bDebug)
    {
        curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
        curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, OnDebug);
    }
    curl_easy_setopt(curl, CURLOPT_URL, strUrl.c_str());
    curl_easy_setopt(curl, CURLOPT_READFUNCTION, NULL);
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, OnWriteData);
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&strResponse);
    curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
    if(NULL == pCaPath)
    {
        curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, false);
        curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, false);
    }
    else
    {
        curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, true);
        curl_easy_setopt(curl, CURLOPT_CAINFO, pCaPath);
    }
    curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 3);
    curl_easy_setopt(curl, CURLOPT_TIMEOUT, 3);
    res = curl_easy_perform(curl);
    curl_easy_cleanup(curl);
    return res;
}

///////////////////////////////////////////////////////////////////////////////////////////////

void CHttpClient::SetDebug(bool bDebug)
{
    m_bDebug = bDebug;
}

相关文章

网友评论

本文标题:刷访问量

本文链接:https://www.haomeiwen.com/subject/bzisxttx.html