美文网首页
vs2022 c++使用curl+libxml2库获取网站标题d

vs2022 c++使用curl+libxml2库获取网站标题d

作者: 金哲虎 | 来源:发表于2022-06-26 17:13 被阅读0次

    开发环境 windows10 64位

    • 首先需要安装vcpkg工具。 开始使用 vcpkg
    • 创建项目
    • 打开CMD进行安装curllibxml2
    vcpkg install curl:x64-windows
    vcpkg install libxml2:x64-windows
    
    • 安装完成后执行vcpkg integrate install,让vs2022可以识别该lib库。
    • 先填一下坑,使用VS2022,打开X:\vcpkg\vcpkg\installed\x64-windows\include\iconv.h文件,然后Ctrl+A全选代码,菜单文件->iconv.h 另存为-> 选择编码保存->选择UNICODE 代码页 1200 确定保存。
      编码保存
    UNICODE 代码页 1200
    #include <stdio.h>
    #include <string.h>
    #include <stdlib.h>
    #include <string>
    #include<curl/curl.h>
    #include <libxml/HTMLparser.h>
    
    # pragma warning (disable:4819)
    
    //
    //  Case-insensitive string comparison
    //
    
    #ifdef _MSC_VER
    #define COMPARE(a, b) (!_stricmp((a), (b)))
    #else
    #define COMPARE(a, b) (!strcasecmp((a), (b)))
    #endif
    
    //
    //  libxml callback context structure
    //
    
    struct Context
    {
        Context() : addTitle(false) { }
    
        bool addTitle;
        std::string title;
    };
    
    //
    //  libcurl variables for error strings and returned data
    
    static char errorBuffer[CURL_ERROR_SIZE];
    static std::string buffer;
    
    //
    //  libcurl write callback function
    //
    
    static int writer(char* data, size_t size, size_t nmemb,
        std::string* writerData)
    {
        if (writerData == NULL)
            return 0;
    
        writerData->append(data, size * nmemb);
    
        return size * nmemb;
    }
    
    //
    //  libcurl connection initialization
    //
    
    static bool init(CURL*& conn, char* url)
    {
        CURLcode code;
    
        conn = curl_easy_init();
    
        if (conn == NULL) {
            fprintf(stderr, "Failed to create CURL connection\n");
            exit(EXIT_FAILURE);
        }
    
        code = curl_easy_setopt(conn, CURLOPT_ERRORBUFFER, errorBuffer);
        if (code != CURLE_OK) {
            fprintf(stderr, "Failed to set error buffer [%d]\n", code);
            return false;
        }
    
        code = curl_easy_setopt(conn, CURLOPT_URL, url);
        if (code != CURLE_OK) {
            fprintf(stderr, "Failed to set URL [%s]\n", errorBuffer);
            return false;
        }
    
        code = curl_easy_setopt(conn, CURLOPT_FOLLOWLOCATION, 1L);
        if (code != CURLE_OK) {
            fprintf(stderr, "Failed to set redirect option [%s]\n", errorBuffer);
            return false;
        }
    
        code = curl_easy_setopt(conn, CURLOPT_WRITEFUNCTION, writer);
        if (code != CURLE_OK) {
            fprintf(stderr, "Failed to set writer [%s]\n", errorBuffer);
            return false;
        }
    
        code = curl_easy_setopt(conn, CURLOPT_WRITEDATA, &buffer);
        if (code != CURLE_OK) {
            fprintf(stderr, "Failed to set write data [%s]\n", errorBuffer);
            return false;
        }
    
        return true;
    }
    
    //
    //  libxml start element callback function
    //
    
    static void StartElement(void* voidContext,
        const xmlChar* name,
        const xmlChar** attributes)
    {
        Context* context = static_cast<Context*>(voidContext);
    
        if (COMPARE(reinterpret_cast<const char*>(name), "TITLE")) {
            context->title = "";
            context->addTitle = true;
        }
        (void)attributes;
    }
    
    //
    //  libxml end element callback function
    //
    
    static void EndElement(void* voidContext,
        const xmlChar* name)
    {
        Context* context = static_cast<Context*>(voidContext);
    
        if (COMPARE(reinterpret_cast<const char*>(name), "TITLE"))
            context->addTitle = false;
    }
    
    //
    //  Text handling helper function
    //
    
    static void handleCharacters(Context* context,
        const xmlChar* chars,
        int length)
    {
        if (context->addTitle)
            context->title.append(reinterpret_cast<const char*>(chars), length);
    }
    
    //
    //  libxml PCDATA callback function
    //
    
    static void Characters(void* voidContext,
        const xmlChar* chars,
        int length)
    {
        Context* context = static_cast<Context*>(voidContext);
    
        handleCharacters(context, chars, length);
    }
    
    //
    //  libxml CDATA callback function
    //
    
    static void cdata(void* voidContext,
        const xmlChar* chars,
        int length)
    {
        Context* context = static_cast<Context*>(voidContext);
    
        handleCharacters(context, chars, length);
    }
    
    //
    //  libxml SAX callback structure
    //
    
    static htmlSAXHandler saxHandler =
    {
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      StartElement,
      EndElement,
      NULL,
      Characters,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      NULL,
      cdata,
      NULL
    };
    
    //
    //  Parse given (assumed to be) HTML text and return the title
    //
    
    static void parseHtml(const std::string& html,
        std::string& title)
    {
        htmlParserCtxtPtr ctxt;
        Context context;
    
        ctxt = htmlCreatePushParserCtxt(&saxHandler, &context, "", 0, "",
            XML_CHAR_ENCODING_NONE);
    
        htmlParseChunk(ctxt, html.c_str(), html.size(), 0);
        htmlParseChunk(ctxt, "", 0, 1);
    
        htmlFreeParserCtxt(ctxt);
    
        title = context.title;
    }
    
    int main(int argc, char* argv[])
    {
        CURL* conn = NULL;
        CURLcode code;
        std::string title;
    
        // Ensure one argument is given
    
        if (argc != 2) {
            fprintf(stderr, "Usage: %s <url>\n", argv[0]);
            exit(EXIT_FAILURE);
        }
    
        curl_global_init(CURL_GLOBAL_DEFAULT);
    
        // Initialize CURL connection
    
        if (!init(conn, argv[1])) {
            fprintf(stderr, "Connection initializion failed\n");
            exit(EXIT_FAILURE);
        }
    
        // Retrieve content for the URL
    
        code = curl_easy_perform(conn);
        curl_easy_cleanup(conn);
    
        if (code != CURLE_OK) {
            fprintf(stderr, "Failed to get '%s' [%s]\n", argv[1], errorBuffer);
            exit(EXIT_FAILURE);
        }
    
        // Parse the (assumed) HTML code
        parseHtml(buffer, title);
    
        // Display the extracted title
        printf("Title: %s\n", title.c_str());
    
        return EXIT_SUCCESS;
    }
    

    项目->属性->如下图设置。关闭所有警告

    关闭所有警告
    • 测试结果


      测试结果

    相关文章

      网友评论

          本文标题:vs2022 c++使用curl+libxml2库获取网站标题d

          本文链接:https://www.haomeiwen.com/subject/lhcovrtx.html