美文网首页
libuv http echo服务器性能测试

libuv http echo服务器性能测试

作者: 谭英智 | 来源:发表于2022-04-22 00:28 被阅读0次

    大家对于echo服务器非常熟悉,但是每当被人问起我,echo服务器的性能是多少,我总是答不上来,要是搭上了,那也是瞎蒙的。

    所以想自己做一系列的实验,来看看一个服务器,从零到优,是怎么样一个过程。

    本人知识有限,可能下面的实验很多的办法都并非最佳实践,都是自己脑洞大开的结果。

    下面从运行服务器的配置说起,并使用各种想到的优化,得出每次优化的结果,并最终得出一个比较具体的结论。

    如果对压测过程不太关心的读者,可以直接看最后的结论。

    服务器配置

    属性
    服务器 笔记本
    CPU Intel Core i7 6核
    RAM 16G
    磁盘 SSD 512G
    环境 Virtual Box 虚拟器 Centos

    下面这张图是笔记本的详细配置

    libuv-computer

    Http服务器代码(使用libuv)

    代码

    http服务器使用C++编写,网络使用libuv做多路复用

    程序代码是从网络上拷贝下来的,基本没改过。

    服务器接收到http请求,直接回一个固定的报文,返回客户端。

    客户端与服务器使用短链接,一次报文,产生TCP三次握手,和四次挥手

    代码如下:

    #include <stdio.h>
    #include <stdlib.h>
    #include <assert.h>
    
    #include "uv.h"
    #include "http_parser.h"
    
    struct header
    {
        char field[1024];
        char value[1024];
    };
    typedef enum
    {
        NONE = 0,
        FIELD,
        VALUE
    } head_type;
    struct message
    {
        int header_num;
        char url[1024];
        header headers[15];
        head_type last_header_element;
    };
    
    int on_message_begin(http_parser *parser);
    int on_headers_complete(http_parser *parser);
    int on_message_complete(http_parser *parser);
    int on_url(http_parser *parser, const char *at, size_t length);
    int on_status(http_parser *parser, const char *at, size_t length);
    int on_header_field(http_parser *parser, const char *at, size_t length);
    int on_header_value(http_parser *parser, const char *at, size_t length);
    int on_body(http_parser *parser, const char *at, size_t length);
    int on_chunk_header(http_parser *parser);
    int on_chunk_complete(http_parser *parser);
    
    /* strnlen() is a POSIX.2008 addition. Can't rely on it being available so
     * define it ourselves.
     */
    size_t
    strnlen(const char *s, size_t maxlen)
    {
        const char *p;
    
        p = (const char *)memchr(s, '\0', maxlen);
        if (p == NULL)
            return maxlen;
    
        return p - s;
    }
    
    size_t
    strlncat(char *dst, size_t len, const char *src, size_t n)
    {
        size_t slen;
        size_t dlen;
        size_t rlen;
        size_t ncpy;
    
        slen = strnlen(src, n);
        dlen = strnlen(dst, len);
    
        if (dlen < len)
        {
            rlen = len - dlen;
            ncpy = slen < rlen ? slen : (rlen - 1);
            memcpy(dst + dlen, src, ncpy);
            dst[dlen + ncpy] = '\0';
        }
    
        assert(len > slen + dlen);
        return slen + dlen;
    }
    
    size_t
    strlcat(char *dst, const char *src, size_t len)
    {
        return strlncat(dst, len, src, (size_t)-1);
    }
    
    size_t
    strlncpy(char *dst, size_t len, const char *src, size_t n)
    {
        size_t slen;
        size_t ncpy;
    
        slen = strnlen(src, n);
    
        if (len > 0)
        {
            ncpy = slen < len ? slen : (len - 1);
            memcpy(dst, src, ncpy);
            dst[ncpy] = '\0';
        }
    
        assert(len > slen);
        return slen;
    }
    
    size_t
    strlcpy(char *dst, const char *src, size_t len)
    {
        return strlncpy(dst, len, src, (size_t)-1);
    }
    
    #define CHECK(r, msg)                                     \
        if (r)                                                \
        {                                                     \
            fprintf(stderr, "%s: %s\n", msg, uv_strerror(r)); \
            exit(1);                                          \
        }
    
    #if 0
    
    #define UVERR(err, msg) fprintf(stderr, "%s: %s\n", msg, uv_strerror(err))
    #define LOG(msg) puts(msg);
    #define LOGF(fmt, ...) printf(fmt, ##__VA_ARGS__);
    #define LOG_ERROR(msg) puts(msg);
    
    #else
    
    #define UVERR(err, msg)
    #define LOG(msg)
    #define LOGF(fmt, ...)
    #define LOG_ERROR(msg)
    
    #endif
    
    #define RESPONSE                   \
        "HTTP/1.1 200 OK\r\n"          \
        "Content-Type: text/plain\r\n" \
        "Content-Length: 12\r\n"       \
        "\r\n"                         \
        "hello world\n"
    
    static uv_loop_t *uv_loop;
    static uv_tcp_t server;
    static http_parser_settings parser_settings;
    
    static uv_buf_t resbuf;
    
    static uv_async_t async; //异步任务
    
    typedef struct
    {
        uv_tcp_t handle;
        http_parser parser;
        uv_write_t write_req;
        int request_num;
        message msg;
    } client_t;
    
    void on_close(uv_handle_t *handle)
    {
        client_t *client = (client_t *)handle->data;
    
        LOGF("[ %5d ] connection closed\n", client->request_num);
    
        free(client);
        // printf("on_close\n");
    }
    
    void on_alloc(uv_handle_t *handle, size_t suggested_size, uv_buf_t *buf)
    {
        // suggested_size = 10;
        buf->base = (char *)malloc(suggested_size);
        buf->len = suggested_size;
        LOGF("on_alloc %p\n", buf->base);
    }
    
    void on_read(uv_stream_t *tcp, ssize_t nread, const uv_buf_t *buf)
    {
        size_t parsed;
    
        client_t *client = (client_t *)tcp->data;
    
        if (nread >= 0)
        {
            parsed = http_parser_execute(
                &client->parser, &parser_settings, buf->base, nread);
            if (parsed < nread)
            {
                struct sockaddr_in addr;
                char ipv4addr[64];
                int namelen = sizeof(addr);
                uv_tcp_getpeername((const uv_tcp_t *)tcp, (struct sockaddr *)&addr, &namelen);
                uv_ip4_name(&addr, ipv4addr, 64);
    
                LOGF("parse error,peer addr %s\n", ipv4addr);
                printf("parse error,peer addr %s\n", ipv4addr);
    
                uv_close((uv_handle_t *)&client->handle, on_close);
            }
        }
        else
        {
            if (nread != UV_EOF)
                UVERR(nread, uv_err_name(nread));
            // printf("on_read nread==0\n");
            uv_close((uv_handle_t *)client, on_close);
        }
    
        LOGF("free alloc %p\n", buf->base);
        free(buf->base);
    
        uv_async_send(&async);
    }
    
    static int request_num = 0;
    static int request_pre = request_num;
    
    void on_connect(uv_stream_t *server_handle, int status)
    {
        CHECK(status, "connect");
    
        int r;
    
        assert((uv_tcp_t *)server_handle == &server);
    
        client_t *client = (client_t *)malloc(sizeof(client_t));
        client->request_num = request_num;
        client->msg.last_header_element = NONE;
        client->msg.header_num = 0;
        memset(&client->msg, 0, sizeof(client->msg));
        ++request_num;
        // LOGF("[ %5d ] new connection\n", request_num++);
    
        uv_tcp_init(uv_loop, &client->handle);
        http_parser_init(&client->parser, HTTP_REQUEST);
    
        client->parser.data = client;
        client->handle.data = client;
    
        r = uv_accept(server_handle, (uv_stream_t *)&client->handle);
        CHECK(r, "accept");
    
        uv_read_start((uv_stream_t *)&client->handle, on_alloc, on_read);
    }
    
    void fake_job(uv_timer_t *handle)
    {
        fprintf(stdout, "rate %d\n", request_num - request_pre);
        request_pre = request_num;
    }
    
    void after_write(uv_write_t *req, int status)
    {
        CHECK(status, "write");
    
        uv_close((uv_handle_t *)req->handle, on_close);
    }
    
    //异步处理过程
    void on_async_cb(uv_async_t *handle)
    {
        // printf("on_async_cb\n");
    }
    
    int main()
    {
        int r;
        struct sockaddr_in addr;
        char listen_ip[] = "0.0.0.0";
        int port = 7070;
    
        parser_settings.on_message_begin = on_message_begin;
        parser_settings.on_url = on_url;
        parser_settings.on_status = on_status;
        parser_settings.on_header_field = on_header_field;
        parser_settings.on_header_value = on_header_value;
        parser_settings.on_headers_complete = on_headers_complete;
        parser_settings.on_body = on_body;
        parser_settings.on_message_complete = on_message_complete;
        parser_settings.on_chunk_header = on_chunk_header;
        parser_settings.on_chunk_complete = on_chunk_complete;
    
        resbuf.base = (char *)(RESPONSE);
        resbuf.len = strlen(RESPONSE);
    
        uv_loop = uv_default_loop();
    
        r = uv_tcp_init(uv_loop, &server);
        CHECK(r, "bind");
    
        uv_ip4_addr(listen_ip, port, &addr);
    
        r = uv_tcp_bind(&server, (const struct sockaddr *)&addr, 0);
        CHECK(r, "bind");
        uv_listen((uv_stream_t *)&server, 128, on_connect);
    
        printf("listening on %s:%d\n", listen_ip, port);
    
        // uv_timer_t fake_job_req;
        // uv_timer_init(uv_loop, &fake_job_req);
        // uv_timer_start(&fake_job_req, fake_job, 1000, 1000);
        uv_async_init(uv_loop, &async, on_async_cb);
    
        uv_run(uv_loop, UV_RUN_DEFAULT);
    }
    
    int on_message_begin(http_parser *parser)
    {
        // printf("\n***MESSAGE BEGIN***\n\n");
        return 0;
    }
    
    int on_headers_complete(http_parser *parser)
    {
        client_t *client = (client_t *)parser->data;
    
        LOGF("[ %5d ] http message parsed\n", client->request_num);
    
        return 0;
    }
    
    int on_message_complete(http_parser *parser)
    {
        // printf("\n***MESSAGE COMPLETE***\n\n");
    
        client_t *client = (client_t *)parser->data;
        uv_write(
            &client->write_req,
            (uv_stream_t *)&client->handle,
            &resbuf,
            1,
            after_write);
    
        return 0;
    }
    
    int on_url(http_parser *parser, const char *at, size_t length)
    {
        client_t *client = (client_t *)parser->data;
        strlncat(client->msg.url,
                 1024, at, length);
        // printf("Url: %d,%s\n", (int)length, client->msg.url);
    
        return 0;
    }
    
    int on_status(http_parser *parser, const char *at, size_t length)
    {
        client_t *client = (client_t *)parser->data;
        strlncat(client->msg.url,
                 1024, at, length);
        // printf("status: %d,%s\n", (int)length, client->msg.url);
    
        return 0;
    }
    
    int on_header_field(http_parser *parser, const char *at, size_t length)
    {
        // printf("Header field: %d,%p\n", (int)length, at);
        client_t *client = (client_t *)parser->data;
        if (client->msg.last_header_element != FIELD)
        {
            ++client->msg.header_num;
        }
    
        strlncat(client->msg.headers[client->msg.header_num - 1].field,
                 1024, at, length);
        client->msg.last_header_element = FIELD;
        return 0;
    }
    
    int on_header_value(http_parser *parser, const char *at, size_t length)
    {
        // printf("Header value: %d,%p\n", (int)length, at);
        client_t *client = (client_t *)parser->data;
        strlncat(client->msg.headers[client->msg.header_num - 1].value,
                 1024, at, length);
        client->msg.last_header_element = VALUE;
        return 0;
    }
    
    int on_body(http_parser *parser, const char *at, size_t length)
    {
        // printf("Body: %d,%p\n", (int)length, at);
        return 0;
    }
    
    int on_chunk_header(http_parser *parser)
    {
        // printf("\n***chunk_header***\n\n");
        return 0;
    }
    
    int on_chunk_complete(http_parser *parser)
    {
        // printf("\n***chunk_complete***\n\n");
        return 0;
    }
    

    Makefile

    CC=g++
    SRC = $(wildcard *.cpp)
    OBJS = $(patsubst %.cpp, %.o, $(SRC))
    FLAG = -g -Werror -I. -I/root/libuv/include -pthread -luv -lhttp_parser
    TARGET = a.out
    
    $(TARGET):$(OBJS)
        $(CC) -o $@ $^ $(FLAG)
    
    %.o:%.cpp
        $(CC) -o $@ -c $(FLAG) $< -g -MD -MF .$@.d
    
    clean:
        rm -rf $(TARGET) $(OBJS)
    
    

    压测过程

    不做任何优化

    直接通过上面的Makefile编译程序,并放在虚拟器上运行,虚拟器只分配了一个核,压测工具使用ab,同时跑在同一台虚拟机上。

    结果

    指标
    QPS 7015
    P99 3ms

    下面是使用ab跑出来的结果:

    [root@localhost code]# ab -n100000 -c 10 http://127.0.0.1:7070/
    This is ApacheBench, Version 2.3 <$Revision: 1430300 $>
    Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/
    Licensed to The Apache Software Foundation, http://www.apache.org/
    
    Benchmarking 127.0.0.1 (be patient)
    Completed 10000 requests
    Completed 20000 requests
    Completed 30000 requests
    Completed 40000 requests
    Completed 50000 requests
    Completed 60000 requests
    Completed 70000 requests
    Completed 80000 requests
    Completed 90000 requests
    Completed 100000 requests
    Finished 100000 requests
    
    
    Server Software:
    Server Hostname:        127.0.0.1
    Server Port:            7070
    
    Document Path:          /
    Document Length:        12 bytes
    
    Concurrency Level:      10
    Time taken for tests:   14.254 seconds
    Complete requests:      100000
    Failed requests:        0
    Write errors:           0
    Total transferred:      7700000 bytes
    HTML transferred:       1200000 bytes
    Requests per second:    7015.45 [#/sec] (mean)
    Time per request:       1.425 [ms] (mean)
    Time per request:       0.143 [ms] (mean, across all concurrent requests)
    Transfer rate:          527.53 [Kbytes/sec] received
    
    Connection Times (ms)
                  min  mean[+/-sd] median   max
    Connect:        0    1   0.3      1      12
    Processing:     0    1   0.3      1      15
    Waiting:        0    0   0.2      0      12
    Total:          1    1   0.5      1      16
    
    Percentage of the requests served within a certain time (ms)
      50%      1
      66%      1
      75%      1
      80%      2
      90%      2
      95%      2
      98%      3
      99%      3
     100%     16 (longest request)
    

    加入编译优化O1

    Makefile

    CC=g++
    SRC = $(wildcard *.cpp)
    OBJS = $(patsubst %.cpp, %.o, $(SRC))
    FLAG = -g -O1 -Werror -I. -I/root/libuv/include -pthread -luv -lhttp_parser
    TARGET = a.out
    
    $(TARGET):$(OBJS)
        $(CC) -o $@ $^ $(FLAG)
    
    %.o:%.cpp
        $(CC) -o $@ -c $(FLAG) $< -g -MD -MF .$@.d
    
    clean:
        rm -rf $(TARGET) $(OBJS)
    
    
    

    结果

    指标
    QPS 6845
    P99 3ms

    下面是使用ab压测出来的详细结果:

    [root@localhost code]# ab -n100000 -c 10 http://127.0.0.1:7070/
    This is ApacheBench, Version 2.3 <$Revision: 1430300 $>
    Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/
    Licensed to The Apache Software Foundation, http://www.apache.org/
    
    Benchmarking 127.0.0.1 (be patient)
    Completed 10000 requests
    Completed 20000 requests
    Completed 30000 requests
    Completed 40000 requests
    Completed 50000 requests
    Completed 60000 requests
    Completed 70000 requests
    Completed 80000 requests
    Completed 90000 requests
    Completed 100000 requests
    Finished 100000 requests
    
    
    Server Software:
    Server Hostname:        127.0.0.1
    Server Port:            7070
    
    Document Path:          /
    Document Length:        12 bytes
    
    Concurrency Level:      10
    Time taken for tests:   14.609 seconds
    Complete requests:      100000
    Failed requests:        0
    Write errors:           0
    Total transferred:      7700000 bytes
    HTML transferred:       1200000 bytes
    Requests per second:    6845.00 [#/sec] (mean)
    Time per request:       1.461 [ms] (mean)
    Time per request:       0.146 [ms] (mean, across all concurrent requests)
    Transfer rate:          514.71 [Kbytes/sec] received
    
    Connection Times (ms)
                  min  mean[+/-sd] median   max
    Connect:        0    1   0.3      1      11
    Processing:     0    1   0.3      1      11
    Waiting:        0    0   0.2      0       8
    Total:          1    1   0.5      1      13
    
    Percentage of the requests served within a certain time (ms)
      50%      1
      66%      1
      75%      2
      80%      2
      90%      2
      95%      2
      98%      3
      99%      3
     100%     13 (longest request)
    
    

    编译优化O2

    Makefile

    CC=g++
    SRC = $(wildcard *.cpp)
    OBJS = $(patsubst %.cpp, %.o, $(SRC))
    FLAG = -g -O2 -Werror -I. -I/root/libuv/include -pthread -luv -lhttp_parser
    TARGET = a.out
    
    $(TARGET):$(OBJS)
        $(CC) -o $@ $^ $(FLAG)
    
    %.o:%.cpp
        $(CC) -o $@ -c $(FLAG) $< -g -MD -MF .$@.d
    
    clean:
        rm -rf $(TARGET) $(OBJS)
    

    结果

    指标
    QPS 7618
    P99 2ms

    进行了O2优化,程序的性能并没有得到很大的提升

    下面是使用ab压测工具输出的结果

    [root@localhost code]# ab -n100000 -c 10 http://127.0.0.1:7070/
    This is ApacheBench, Version 2.3 <$Revision: 1430300 $>
    Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/
    Licensed to The Apache Software Foundation, http://www.apache.org/
    
    Benchmarking 127.0.0.1 (be patient)
    Completed 10000 requests
    Completed 20000 requests
    Completed 30000 requests
    Completed 40000 requests
    Completed 50000 requests
    Completed 60000 requests
    Completed 70000 requests
    Completed 80000 requests
    Completed 90000 requests
    Completed 100000 requests
    Finished 100000 requests
    
    
    Server Software:
    Server Hostname:        127.0.0.1
    Server Port:            7070
    
    Document Path:          /
    Document Length:        12 bytes
    
    Concurrency Level:      10
    Time taken for tests:   13.127 seconds
    Complete requests:      100000
    Failed requests:        0
    Write errors:           0
    Total transferred:      7700000 bytes
    HTML transferred:       1200000 bytes
    Requests per second:    7618.07 [#/sec] (mean)
    Time per request:       1.313 [ms] (mean)
    Time per request:       0.131 [ms] (mean, across all concurrent requests)
    Transfer rate:          572.84 [Kbytes/sec] received
    
    Connection Times (ms)
                  min  mean[+/-sd] median   max
    Connect:        0    1   0.2      1      16
    Processing:     0    1   0.3      1      16
    Waiting:        0    0   0.2      0      16
    Total:          1    1   0.3      1      17
    
    Percentage of the requests served within a certain time (ms)
      50%      1
      66%      1
      75%      1
      80%      1
      90%      2
      95%      2
      98%      2
      99%      2
     100%     17 (longest request)
    

    编译优化O3

    Makefile

    CC=g++
    SRC = $(wildcard *.cpp)
    OBJS = $(patsubst %.cpp, %.o, $(SRC))
    FLAG = -g -O2 -Werror -I. -I/root/libuv/include -pthread -luv -lhttp_parser
    TARGET = a.out
    
    $(TARGET):$(OBJS)
        $(CC) -o $@ $^ $(FLAG)
    
    %.o:%.cpp
        $(CC) -o $@ -c $(FLAG) $< -g -MD -MF .$@.d
    
    clean:
        rm -rf $(TARGET) $(OBJS)
    

    结果

    指标
    QPS 7498
    P99 2ms

    进行了O3优化,程序的性能并没有得到很大的提升

    下面是使用ab压测工具输出的报告:

    [root@localhost code]# ab -n100000 -c 30 http://127.0.0.1:7070/
    This is ApacheBench, Version 2.3 <$Revision: 1430300 $>
    Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/
    Licensed to The Apache Software Foundation, http://www.apache.org/
    
    Benchmarking 127.0.0.1 (be patient)
    Completed 10000 requests
    Completed 20000 requests
    Completed 30000 requests
    Completed 40000 requests
    Completed 50000 requests
    Completed 60000 requests
    Completed 70000 requests
    Completed 80000 requests
    Completed 90000 requests
    Completed 100000 requests
    Finished 100000 requests
    
    
    Server Software:
    Server Hostname:        127.0.0.1
    Server Port:            7070
    
    Document Path:          /
    Document Length:        12 bytes
    
    Concurrency Level:      10
    Time taken for tests:   13.336 seconds
    Complete requests:      100000
    Failed requests:        0
    Write errors:           0
    Total transferred:      7700000 bytes
    HTML transferred:       1200000 bytes
    Requests per second:    7498.41 [#/sec] (mean)
    Time per request:       1.334 [ms] (mean)
    Time per request:       0.133 [ms] (mean, across all concurrent requests)
    Transfer rate:          563.84 [Kbytes/sec] received
    
    Connection Times (ms)
                  min  mean[+/-sd] median   max
    Connect:        0    1   0.3      1      11
    Processing:     0    1   0.3      1      12
    Waiting:        0    0   0.2      0      11
    Total:          1    1   0.4      1      13
    
    Percentage of the requests served within a certain time (ms)
      50%      1
      66%      1
      75%      1
      80%      1
      90%      2
      95%      2
      98%      2
      99%      2
     100%     13 (longest request)
    

    编译优化O2性能分析

    perf获取采样数据

    指标
    上下文切换 8658
    缺页中断 177733

    下面是perf输出的详细结果:

    [root@localhost uv_server]# perf stat ./a.out
    listening on 0.0.0.0:7070
    ^C./a.out: Interrupt
    
     Performance counter stats for './a.out':
    
             17,153.28 msec task-clock                #    0.680 CPUs utilized      
                 8,658      context-switches          #    0.505 K/sec              
                     5      cpu-migrations            #    0.000 K/sec              
               177,733      page-faults               #    0.010 M/sec              
       <not supported>      cycles                                                  
       <not supported>      instructions                                            
       <not supported>      branches                                                
       <not supported>      branch-misses                                           
    
          25.230652210 seconds time elapsed
    
           0.363597000 seconds user
          16.794232000 seconds sys
    
    

    启动虚拟机多核

    由于上面的压测都是使用了单核,而ab核http服务器必然会不断的抢夺cpu资源

    因此,决定把虚拟机的CPU核数调成两个,并再进行O2(编译优化的版本)进行压测

    结果

    指标
    QPS 9670
    P99 2ms

    使用了多核,http服务器的性能得到了一次飞跃

    下面是使用ab压测工具输出的报告:

    [root@localhost ~]# ab -n100000 -c 10 http://127.0.0.1:7070/
    This is ApacheBench, Version 2.3 <$Revision: 1430300 $>
    Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/
    Licensed to The Apache Software Foundation, http://www.apache.org/
    
    Benchmarking 127.0.0.1 (be patient)
    Completed 10000 requests
    Completed 20000 requests
    Completed 30000 requests
    Completed 40000 requests
    Completed 50000 requests
    Completed 60000 requests
    Completed 70000 requests
    Completed 80000 requests
    Completed 90000 requests
    Completed 100000 requests
    Finished 100000 requests
    
    
    Server Software:
    Server Hostname:        127.0.0.1
    Server Port:            7070
    
    Document Path:          /
    Document Length:        12 bytes
    
    Concurrency Level:      10
    Time taken for tests:   10.341 seconds
    Complete requests:      100000
    Failed requests:        0
    Write errors:           0
    Total transferred:      7700000 bytes
    HTML transferred:       1200000 bytes
    Requests per second:    9670.08 [#/sec] (mean)
    Time per request:       1.034 [ms] (mean)
    Time per request:       0.103 [ms] (mean, across all concurrent requests)
    Transfer rate:          727.14 [Kbytes/sec] received
    
    Connection Times (ms)
                  min  mean[+/-sd] median   max
    Connect:        0    0   0.2      0       3
    Processing:     0    1   0.2      1       3
    Waiting:        0    0   0.2      0       3
    Total:          0    1   0.2      1       4
    
    Percentage of the requests served within a certain time (ms)
      50%      1
      66%      1
      75%      1
      80%      1
      90%      1
      95%      1
      98%      2
      99%      2
     100%      4 (longest request)
    

    启用tcmalloc

    使用perf生成的报告可以看到,使用了O2优化后的程序,缺页次数高达177,733

    为了减少缺页中断,使用开源的内存管理lib tcmalloc来做程序的进一步优化。

    结果

    把tcmalloc加入了程序之后,性能得到了另一次的飞跃

    指标
    QPS 11679
    P99 2ms

    下面是使用ab压测工具输出的结果:

    This is ApacheBench, Version 2.3 <$Revision: 1430300 $>
    Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/
    Licensed to The Apache Software Foundation, http://www.apache.org/
    
    Benchmarking 127.0.0.1 (be patient)
    
    
    Server Software:
    Server Hostname:        127.0.0.1
    Server Port:            7070
    
    Document Path:          /
    Document Length:        12 bytes
    
    Concurrency Level:      10
    Time taken for tests:   8.562 seconds
    Complete requests:      100000
    Failed requests:        0
    Write errors:           0
    Total transferred:      7700000 bytes
    HTML transferred:       1200000 bytes
    Requests per second:    11679.12 [#/sec] (mean)
    Time per request:       0.856 [ms] (mean)
    Time per request:       0.086 [ms] (mean, across all concurrent requests)
    Transfer rate:          878.22 [Kbytes/sec] received
    
    Connection Times (ms)
                  min  mean[+/-sd] median   max
    Connect:        0    0   0.2      0       4
    Processing:     0    0   0.2      0       6
    Waiting:        0    0   0.2      0       6
    Total:          0    1   0.2      1       8
    
    Percentage of the requests served within a certain time (ms)
      50%      1
      66%      1
      75%      1
      80%      1
      90%      1
      95%      1
      98%      2
      99%      2
     100%      8 (longest request)
    

    查看缺页次数

    指标
    缺页中断 5016

    可以看到缺页次数大大的降低

    下面是perf输出的报告:

    [root@localhost uv_server]# perf stat ./a.out
    listening on 0.0.0.0:7070
    ^C./a.out: Interrupt
    
     Performance counter stats for './a.out':
    
             15,851.09 msec task-clock                #    0.503 CPUs utilized      
                 9,408      context-switches          #    0.594 K/sec              
                    15      cpu-migrations            #    0.001 K/sec              
                 5,016      page-faults               #    0.316 K/sec              
       <not supported>      cycles                                                  
       <not supported>      instructions                                            
       <not supported>      branches                                                
       <not supported>      branch-misses                                           
    
          31.499659541 seconds time elapsed
    
           0.948398000 seconds user
          14.871534000 seconds sys
    
    

    火焰图

    perf record -g -F 99 ./a.out
    
    perf script -i perf.data >out.perf
    ~/FlameGraph/stackcollapse-perf.pl out.perf > out.floded
    ~/FlameGraph/flamegraph.pl out.floded > cp.svg
    
    libuv-perf-o2-pic

    结论

    1. 编译优化,并不能很好的提升程序的性能,我猜测是I7 cpu的功能强大,有强大的推理和乱序逻辑,对编译优化的依赖越来越低导致的。而这不能说编译优化不重要,需要看服务器是在什么体系结构下运行。
    2. 压测客户端和服务器应该隔离运行,在同时运行在一个核时,它们会不断的抢资源,但是由于本人资源有限,所以只能在虚拟机里玩玩,但也需要配置多核来运行。
    3. 服务器离不开内存分配和释放,如果裸用系统的内存分配,会给性能带来一定的下降,这时可以通过使用优秀的内存管理库来优化代码,这无论在什么时候,都是适用的
    4. 程序运行在虚拟机上,代表一次系统调用会发生四次上下文切换,但由于本人懒惰,所以没有继续在原生的机器上再压测服务器,有兴趣的读者,可以试试再裸机上跑,我敢肯定,性能会有不小的飞跃
    5. 从上面的结果看,有一点我是有怀疑态度的,就是延迟基本时毫秒级的,这跟我对网络性能的认知有很大的差距,网络的性能应该时微妙级别的,我现在只能猜测是虚拟机或者网卡的问题。有兴趣的读者可以再进一步论证。
    6. http服务器是使用TCP短连接的,但一般的echo服务器都是长连接的,这可以省去大量的握手和挥手的rtt时间,如果把程序改成长连接,那么性能也会有很大的飞跃。但同时也因为本人懒惰,所以先写到这里。

    相关文章

      网友评论

          本文标题:libuv http echo服务器性能测试

          本文链接:https://www.haomeiwen.com/subject/zvrgertx.html