libuv http echo服务器性能测试

作者: 谭英智 | 来源:发表于2022-04-22 00:28 被阅读0次

libuv http echo服务器性能测试
libuv echo server性能压测
性能测试 | jmeter(上)_基本功能概述
Netty EventLoop与IO模型整理
HTTP服务器状态码解析，秒杀一切面试问题
http_load使用详解
性能测试概览Ⅳ
性能测试常用的测试方法
压测难，难于上青天，80%的直播应用都败在了这里
网站监控—http_load

大家对于echo服务器非常熟悉，但是每当被人问起我，echo服务器的性能是多少，我总是答不上来，要是搭上了，那也是瞎蒙的。

所以想自己做一系列的实验，来看看一个服务器，从零到优，是怎么样一个过程。

本人知识有限，可能下面的实验很多的办法都并非最佳实践，都是自己脑洞大开的结果。

下面从运行服务器的配置说起，并使用各种想到的优化，得出每次优化的结果，并最终得出一个比较具体的结论。

如果对压测过程不太关心的读者，可以直接看最后的结论。

服务器配置

项	属性
服务器	笔记本
CPU	Intel Core i7 6核
RAM	16G
磁盘	SSD 512G
环境	Virtual Box 虚拟器 Centos

下面这张图是笔记本的详细配置

libuv-computer

Http服务器代码（使用libuv）

代码

http服务器使用C++编写，网络使用libuv做多路复用

程序代码是从网络上拷贝下来的，基本没改过。

服务器接收到http请求，直接回一个固定的报文，返回客户端。

客户端与服务器使用短链接，一次报文，产生TCP三次握手，和四次挥手

代码如下：

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

#include "uv.h"
#include "http_parser.h"

struct header
{
    char field[1024];
    char value[1024];
};
typedef enum
{
    NONE = 0,
    FIELD,
    VALUE
} head_type;
struct message
{
    int header_num;
    char url[1024];
    header headers[15];
    head_type last_header_element;
};

int on_message_begin(http_parser *parser);
int on_headers_complete(http_parser *parser);
int on_message_complete(http_parser *parser);
int on_url(http_parser *parser, const char *at, size_t length);
int on_status(http_parser *parser, const char *at, size_t length);
int on_header_field(http_parser *parser, const char *at, size_t length);
int on_header_value(http_parser *parser, const char *at, size_t length);
int on_body(http_parser *parser, const char *at, size_t length);
int on_chunk_header(http_parser *parser);
int on_chunk_complete(http_parser *parser);

/* strnlen() is a POSIX.2008 addition. Can't rely on it being available so
 * define it ourselves.
 */
size_t
strnlen(const char *s, size_t maxlen)
{
    const char *p;

    p = (const char *)memchr(s, '\0', maxlen);
    if (p == NULL)
        return maxlen;

    return p - s;
}

size_t
strlncat(char *dst, size_t len, const char *src, size_t n)
{
    size_t slen;
    size_t dlen;
    size_t rlen;
    size_t ncpy;

    slen = strnlen(src, n);
    dlen = strnlen(dst, len);

    if (dlen < len)
    {
        rlen = len - dlen;
        ncpy = slen < rlen ? slen : (rlen - 1);
        memcpy(dst + dlen, src, ncpy);
        dst[dlen + ncpy] = '\0';
    }

    assert(len > slen + dlen);
    return slen + dlen;
}

size_t
strlcat(char *dst, const char *src, size_t len)
{
    return strlncat(dst, len, src, (size_t)-1);
}

size_t
strlncpy(char *dst, size_t len, const char *src, size_t n)
{
    size_t slen;
    size_t ncpy;

    slen = strnlen(src, n);

    if (len > 0)
    {
        ncpy = slen < len ? slen : (len - 1);
        memcpy(dst, src, ncpy);
        dst[ncpy] = '\0';
    }

    assert(len > slen);
    return slen;
}

size_t
strlcpy(char *dst, const char *src, size_t len)
{
    return strlncpy(dst, len, src, (size_t)-1);
}

#define CHECK(r, msg)                                     \
    if (r)                                                \
    {                                                     \
        fprintf(stderr, "%s: %s\n", msg, uv_strerror(r)); \
        exit(1);                                          \
    }

#if 0

#define UVERR(err, msg) fprintf(stderr, "%s: %s\n", msg, uv_strerror(err))
#define LOG(msg) puts(msg);
#define LOGF(fmt, ...) printf(fmt, ##__VA_ARGS__);
#define LOG_ERROR(msg) puts(msg);

#else

#define UVERR(err, msg)
#define LOG(msg)
#define LOGF(fmt, ...)
#define LOG_ERROR(msg)

#endif

#define RESPONSE                   \
    "HTTP/1.1 200 OK\r\n"          \
    "Content-Type: text/plain\r\n" \
    "Content-Length: 12\r\n"       \
    "\r\n"                         \
    "hello world\n"

static uv_loop_t *uv_loop;
static uv_tcp_t server;
static http_parser_settings parser_settings;

static uv_buf_t resbuf;

static uv_async_t async; //异步任务

typedef struct
{
    uv_tcp_t handle;
    http_parser parser;
    uv_write_t write_req;
    int request_num;
    message msg;
} client_t;

void on_close(uv_handle_t *handle)
{
    client_t *client = (client_t *)handle->data;

    LOGF("[ %5d ] connection closed\n", client->request_num);

    free(client);
    // printf("on_close\n");
}

void on_alloc(uv_handle_t *handle, size_t suggested_size, uv_buf_t *buf)
{
    // suggested_size = 10;
    buf->base = (char *)malloc(suggested_size);
    buf->len = suggested_size;
    LOGF("on_alloc %p\n", buf->base);
}

void on_read(uv_stream_t *tcp, ssize_t nread, const uv_buf_t *buf)
{
    size_t parsed;

    client_t *client = (client_t *)tcp->data;

    if (nread >= 0)
    {
        parsed = http_parser_execute(
            &client->parser, &parser_settings, buf->base, nread);
        if (parsed < nread)
        {
            struct sockaddr_in addr;
            char ipv4addr[64];
            int namelen = sizeof(addr);
            uv_tcp_getpeername((const uv_tcp_t *)tcp, (struct sockaddr *)&addr, &namelen);
            uv_ip4_name(&addr, ipv4addr, 64);

            LOGF("parse error,peer addr %s\n", ipv4addr);
            printf("parse error,peer addr %s\n", ipv4addr);

            uv_close((uv_handle_t *)&client->handle, on_close);
        }
    }
    else
    {
        if (nread != UV_EOF)
            UVERR(nread, uv_err_name(nread));
        // printf("on_read nread==0\n");
        uv_close((uv_handle_t *)client, on_close);
    }

    LOGF("free alloc %p\n", buf->base);
    free(buf->base);

    uv_async_send(&async);
}

static int request_num = 0;
static int request_pre = request_num;

void on_connect(uv_stream_t *server_handle, int status)
{
    CHECK(status, "connect");

    int r;

    assert((uv_tcp_t *)server_handle == &server);

    client_t *client = (client_t *)malloc(sizeof(client_t));
    client->request_num = request_num;
    client->msg.last_header_element = NONE;
    client->msg.header_num = 0;
    memset(&client->msg, 0, sizeof(client->msg));
    ++request_num;
    // LOGF("[ %5d ] new connection\n", request_num++);

    uv_tcp_init(uv_loop, &client->handle);
    http_parser_init(&client->parser, HTTP_REQUEST);

    client->parser.data = client;
    client->handle.data = client;

    r = uv_accept(server_handle, (uv_stream_t *)&client->handle);
    CHECK(r, "accept");

    uv_read_start((uv_stream_t *)&client->handle, on_alloc, on_read);
}

void fake_job(uv_timer_t *handle)
{
    fprintf(stdout, "rate %d\n", request_num - request_pre);
    request_pre = request_num;
}

void after_write(uv_write_t *req, int status)
{
    CHECK(status, "write");

    uv_close((uv_handle_t *)req->handle, on_close);
}

//异步处理过程
void on_async_cb(uv_async_t *handle)
{
    // printf("on_async_cb\n");
}

int main()
{
    int r;
    struct sockaddr_in addr;
    char listen_ip[] = "0.0.0.0";
    int port = 7070;

    parser_settings.on_message_begin = on_message_begin;
    parser_settings.on_url = on_url;
    parser_settings.on_status = on_status;
    parser_settings.on_header_field = on_header_field;
    parser_settings.on_header_value = on_header_value;
    parser_settings.on_headers_complete = on_headers_complete;
    parser_settings.on_body = on_body;
    parser_settings.on_message_complete = on_message_complete;
    parser_settings.on_chunk_header = on_chunk_header;
    parser_settings.on_chunk_complete = on_chunk_complete;

    resbuf.base = (char *)(RESPONSE);
    resbuf.len = strlen(RESPONSE);

    uv_loop = uv_default_loop();

    r = uv_tcp_init(uv_loop, &server);
    CHECK(r, "bind");

    uv_ip4_addr(listen_ip, port, &addr);

    r = uv_tcp_bind(&server, (const struct sockaddr *)&addr, 0);
    CHECK(r, "bind");
    uv_listen((uv_stream_t *)&server, 128, on_connect);

    printf("listening on %s:%d\n", listen_ip, port);

    // uv_timer_t fake_job_req;
    // uv_timer_init(uv_loop, &fake_job_req);
    // uv_timer_start(&fake_job_req, fake_job, 1000, 1000);
    uv_async_init(uv_loop, &async, on_async_cb);

    uv_run(uv_loop, UV_RUN_DEFAULT);
}

int on_message_begin(http_parser *parser)
{
    // printf("\n***MESSAGE BEGIN***\n\n");
    return 0;
}

int on_headers_complete(http_parser *parser)
{
    client_t *client = (client_t *)parser->data;

    LOGF("[ %5d ] http message parsed\n", client->request_num);

    return 0;
}

int on_message_complete(http_parser *parser)
{
    // printf("\n***MESSAGE COMPLETE***\n\n");

    client_t *client = (client_t *)parser->data;
    uv_write(
        &client->write_req,
        (uv_stream_t *)&client->handle,
        &resbuf,
        1,
        after_write);

    return 0;
}

int on_url(http_parser *parser, const char *at, size_t length)
{
    client_t *client = (client_t *)parser->data;
    strlncat(client->msg.url,
             1024, at, length);
    // printf("Url: %d,%s\n", (int)length, client->msg.url);

    return 0;
}

int on_status(http_parser *parser, const char *at, size_t length)
{
    client_t *client = (client_t *)parser->data;
    strlncat(client->msg.url,
             1024, at, length);
    // printf("status: %d,%s\n", (int)length, client->msg.url);

    return 0;
}

int on_header_field(http_parser *parser, const char *at, size_t length)
{
    // printf("Header field: %d,%p\n", (int)length, at);
    client_t *client = (client_t *)parser->data;
    if (client->msg.last_header_element != FIELD)
    {
        ++client->msg.header_num;
    }

    strlncat(client->msg.headers[client->msg.header_num - 1].field,
             1024, at, length);
    client->msg.last_header_element = FIELD;
    return 0;
}

int on_header_value(http_parser *parser, const char *at, size_t length)
{
    // printf("Header value: %d,%p\n", (int)length, at);
    client_t *client = (client_t *)parser->data;
    strlncat(client->msg.headers[client->msg.header_num - 1].value,
             1024, at, length);
    client->msg.last_header_element = VALUE;
    return 0;
}

int on_body(http_parser *parser, const char *at, size_t length)
{
    // printf("Body: %d,%p\n", (int)length, at);
    return 0;
}

int on_chunk_header(http_parser *parser)
{
    // printf("\n***chunk_header***\n\n");
    return 0;
}

int on_chunk_complete(http_parser *parser)
{
    // printf("\n***chunk_complete***\n\n");
    return 0;
}

Makefile

CC=g++
SRC = $(wildcard *.cpp)
OBJS = $(patsubst %.cpp, %.o, $(SRC))
FLAG = -g -Werror -I. -I/root/libuv/include -pthread -luv -lhttp_parser
TARGET = a.out

$(TARGET):$(OBJS)
    $(CC) -o $@ $^ $(FLAG)

%.o:%.cpp
    $(CC) -o $@ -c $(FLAG) $< -g -MD -MF .$@.d

clean:
    rm -rf $(TARGET) $(OBJS)

压测过程

不做任何优化

直接通过上面的Makefile编译程序，并放在虚拟器上运行，虚拟器只分配了一个核，压测工具使用ab，同时跑在同一台虚拟机上。

结果

项	指标
QPS	7015
P99	3ms

下面是使用ab跑出来的结果：

[root@localhost code]# ab -n100000 -c 10 http://127.0.0.1:7070/
This is ApacheBench, Version 2.3 <$Revision: 1430300 $>
Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/
Licensed to The Apache Software Foundation, http://www.apache.org/

Benchmarking 127.0.0.1 (be patient)
Completed 10000 requests
Completed 20000 requests
Completed 30000 requests
Completed 40000 requests
Completed 50000 requests
Completed 60000 requests
Completed 70000 requests
Completed 80000 requests
Completed 90000 requests
Completed 100000 requests
Finished 100000 requests


Server Software:
Server Hostname:        127.0.0.1
Server Port:            7070

Document Path:          /
Document Length:        12 bytes

Concurrency Level:      10
Time taken for tests:   14.254 seconds
Complete requests:      100000
Failed requests:        0
Write errors:           0
Total transferred:      7700000 bytes
HTML transferred:       1200000 bytes
Requests per second:    7015.45 [#/sec] (mean)
Time per request:       1.425 [ms] (mean)
Time per request:       0.143 [ms] (mean, across all concurrent requests)
Transfer rate:          527.53 [Kbytes/sec] received

Connection Times (ms)
              min  mean[+/-sd] median   max
Connect:        0    1   0.3      1      12
Processing:     0    1   0.3      1      15
Waiting:        0    0   0.2      0      12
Total:          1    1   0.5      1      16

Percentage of the requests served within a certain time (ms)
  50%      1
  66%      1
  75%      1
  80%      2
  90%      2
  95%      2
  98%      3
  99%      3
 100%     16 (longest request)

加入编译优化O1

Makefile

CC=g++
SRC = $(wildcard *.cpp)
OBJS = $(patsubst %.cpp, %.o, $(SRC))
FLAG = -g -O1 -Werror -I. -I/root/libuv/include -pthread -luv -lhttp_parser
TARGET = a.out

$(TARGET):$(OBJS)
    $(CC) -o $@ $^ $(FLAG)

%.o:%.cpp
    $(CC) -o $@ -c $(FLAG) $< -g -MD -MF .$@.d

clean:
    rm -rf $(TARGET) $(OBJS)

结果

项	指标
QPS	6845
P99	3ms

下面是使用ab压测出来的详细结果：

[root@localhost code]# ab -n100000 -c 10 http://127.0.0.1:7070/
This is ApacheBench, Version 2.3 <$Revision: 1430300 $>
Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/
Licensed to The Apache Software Foundation, http://www.apache.org/

Benchmarking 127.0.0.1 (be patient)
Completed 10000 requests
Completed 20000 requests
Completed 30000 requests
Completed 40000 requests
Completed 50000 requests
Completed 60000 requests
Completed 70000 requests
Completed 80000 requests
Completed 90000 requests
Completed 100000 requests
Finished 100000 requests


Server Software:
Server Hostname:        127.0.0.1
Server Port:            7070

Document Path:          /
Document Length:        12 bytes

Concurrency Level:      10
Time taken for tests:   14.609 seconds
Complete requests:      100000
Failed requests:        0
Write errors:           0
Total transferred:      7700000 bytes
HTML transferred:       1200000 bytes
Requests per second:    6845.00 [#/sec] (mean)
Time per request:       1.461 [ms] (mean)
Time per request:       0.146 [ms] (mean, across all concurrent requests)
Transfer rate:          514.71 [Kbytes/sec] received

Connection Times (ms)
              min  mean[+/-sd] median   max
Connect:        0    1   0.3      1      11
Processing:     0    1   0.3      1      11
Waiting:        0    0   0.2      0       8
Total:          1    1   0.5      1      13

Percentage of the requests served within a certain time (ms)
  50%      1
  66%      1
  75%      2
  80%      2
  90%      2
  95%      2
  98%      3
  99%      3
 100%     13 (longest request)

编译优化O2

Makefile

CC=g++
SRC = $(wildcard *.cpp)
OBJS = $(patsubst %.cpp, %.o, $(SRC))
FLAG = -g -O2 -Werror -I. -I/root/libuv/include -pthread -luv -lhttp_parser
TARGET = a.out

$(TARGET):$(OBJS)
    $(CC) -o $@ $^ $(FLAG)

%.o:%.cpp
    $(CC) -o $@ -c $(FLAG) $< -g -MD -MF .$@.d

clean:
    rm -rf $(TARGET) $(OBJS)

结果

项	指标
QPS	7618
P99	2ms

进行了O2优化，程序的性能并没有得到很大的提升

下面是使用ab压测工具输出的结果

[root@localhost code]# ab -n100000 -c 10 http://127.0.0.1:7070/
This is ApacheBench, Version 2.3 <$Revision: 1430300 $>
Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/
Licensed to The Apache Software Foundation, http://www.apache.org/

Benchmarking 127.0.0.1 (be patient)
Completed 10000 requests
Completed 20000 requests
Completed 30000 requests
Completed 40000 requests
Completed 50000 requests
Completed 60000 requests
Completed 70000 requests
Completed 80000 requests
Completed 90000 requests
Completed 100000 requests
Finished 100000 requests


Server Software:
Server Hostname:        127.0.0.1
Server Port:            7070

Document Path:          /
Document Length:        12 bytes

Concurrency Level:      10
Time taken for tests:   13.127 seconds
Complete requests:      100000
Failed requests:        0
Write errors:           0
Total transferred:      7700000 bytes
HTML transferred:       1200000 bytes
Requests per second:    7618.07 [#/sec] (mean)
Time per request:       1.313 [ms] (mean)
Time per request:       0.131 [ms] (mean, across all concurrent requests)
Transfer rate:          572.84 [Kbytes/sec] received

Connection Times (ms)
              min  mean[+/-sd] median   max
Connect:        0    1   0.2      1      16
Processing:     0    1   0.3      1      16
Waiting:        0    0   0.2      0      16
Total:          1    1   0.3      1      17

Percentage of the requests served within a certain time (ms)
  50%      1
  66%      1
  75%      1
  80%      1
  90%      2
  95%      2
  98%      2
  99%      2
 100%     17 (longest request)

编译优化O3

Makefile

CC=g++
SRC = $(wildcard *.cpp)
OBJS = $(patsubst %.cpp, %.o, $(SRC))
FLAG = -g -O2 -Werror -I. -I/root/libuv/include -pthread -luv -lhttp_parser
TARGET = a.out

$(TARGET):$(OBJS)
    $(CC) -o $@ $^ $(FLAG)

%.o:%.cpp
    $(CC) -o $@ -c $(FLAG) $< -g -MD -MF .$@.d

clean:
    rm -rf $(TARGET) $(OBJS)

结果

项	指标
QPS	7498
P99	2ms

进行了O3优化，程序的性能并没有得到很大的提升

下面是使用ab压测工具输出的报告：

[root@localhost code]# ab -n100000 -c 30 http://127.0.0.1:7070/
This is ApacheBench, Version 2.3 <$Revision: 1430300 $>
Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/
Licensed to The Apache Software Foundation, http://www.apache.org/

Benchmarking 127.0.0.1 (be patient)
Completed 10000 requests
Completed 20000 requests
Completed 30000 requests
Completed 40000 requests
Completed 50000 requests
Completed 60000 requests
Completed 70000 requests
Completed 80000 requests
Completed 90000 requests
Completed 100000 requests
Finished 100000 requests


Server Software:
Server Hostname:        127.0.0.1
Server Port:            7070

Document Path:          /
Document Length:        12 bytes

Concurrency Level:      10
Time taken for tests:   13.336 seconds
Complete requests:      100000
Failed requests:        0
Write errors:           0
Total transferred:      7700000 bytes
HTML transferred:       1200000 bytes
Requests per second:    7498.41 [#/sec] (mean)
Time per request:       1.334 [ms] (mean)
Time per request:       0.133 [ms] (mean, across all concurrent requests)
Transfer rate:          563.84 [Kbytes/sec] received

Connection Times (ms)
              min  mean[+/-sd] median   max
Connect:        0    1   0.3      1      11
Processing:     0    1   0.3      1      12
Waiting:        0    0   0.2      0      11
Total:          1    1   0.4      1      13

Percentage of the requests served within a certain time (ms)
  50%      1
  66%      1
  75%      1
  80%      1
  90%      2
  95%      2
  98%      2
  99%      2
 100%     13 (longest request)

编译优化O2性能分析

perf获取采样数据

项	指标
上下文切换	8658
缺页中断	177733

下面是perf输出的详细结果：

[root@localhost uv_server]# perf stat ./a.out
listening on 0.0.0.0:7070
^C./a.out: Interrupt

 Performance counter stats for './a.out':

         17,153.28 msec task-clock                #    0.680 CPUs utilized      
             8,658      context-switches          #    0.505 K/sec              
                 5      cpu-migrations            #    0.000 K/sec              
           177,733      page-faults               #    0.010 M/sec              
   <not supported>      cycles                                                  
   <not supported>      instructions                                            
   <not supported>      branches                                                
   <not supported>      branch-misses                                           

      25.230652210 seconds time elapsed

       0.363597000 seconds user
      16.794232000 seconds sys

启动虚拟机多核

由于上面的压测都是使用了单核，而ab核http服务器必然会不断的抢夺cpu资源

因此，决定把虚拟机的CPU核数调成两个，并再进行O2(编译优化的版本)进行压测

结果

项	指标
QPS	9670
P99	2ms

使用了多核，http服务器的性能得到了一次飞跃

下面是使用ab压测工具输出的报告：

[root@localhost ~]# ab -n100000 -c 10 http://127.0.0.1:7070/
This is ApacheBench, Version 2.3 <$Revision: 1430300 $>
Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/
Licensed to The Apache Software Foundation, http://www.apache.org/

Benchmarking 127.0.0.1 (be patient)
Completed 10000 requests
Completed 20000 requests
Completed 30000 requests
Completed 40000 requests
Completed 50000 requests
Completed 60000 requests
Completed 70000 requests
Completed 80000 requests
Completed 90000 requests
Completed 100000 requests
Finished 100000 requests


Server Software:
Server Hostname:        127.0.0.1
Server Port:            7070

Document Path:          /
Document Length:        12 bytes

Concurrency Level:      10
Time taken for tests:   10.341 seconds
Complete requests:      100000
Failed requests:        0
Write errors:           0
Total transferred:      7700000 bytes
HTML transferred:       1200000 bytes
Requests per second:    9670.08 [#/sec] (mean)
Time per request:       1.034 [ms] (mean)
Time per request:       0.103 [ms] (mean, across all concurrent requests)
Transfer rate:          727.14 [Kbytes/sec] received

Connection Times (ms)
              min  mean[+/-sd] median   max
Connect:        0    0   0.2      0       3
Processing:     0    1   0.2      1       3
Waiting:        0    0   0.2      0       3
Total:          0    1   0.2      1       4

Percentage of the requests served within a certain time (ms)
  50%      1
  66%      1
  75%      1
  80%      1
  90%      1
  95%      1
  98%      2
  99%      2
 100%      4 (longest request)

启用tcmalloc

使用perf生成的报告可以看到，使用了O2优化后的程序，缺页次数高达177,733

为了减少缺页中断，使用开源的内存管理lib tcmalloc来做程序的进一步优化。

结果

把tcmalloc加入了程序之后，性能得到了另一次的飞跃

项	指标
QPS	11679
P99	2ms

下面是使用ab压测工具输出的结果：

This is ApacheBench, Version 2.3 <$Revision: 1430300 $>
Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/
Licensed to The Apache Software Foundation, http://www.apache.org/

Benchmarking 127.0.0.1 (be patient)


Server Software:
Server Hostname:        127.0.0.1
Server Port:            7070

Document Path:          /
Document Length:        12 bytes

Concurrency Level:      10
Time taken for tests:   8.562 seconds
Complete requests:      100000
Failed requests:        0
Write errors:           0
Total transferred:      7700000 bytes
HTML transferred:       1200000 bytes
Requests per second:    11679.12 [#/sec] (mean)
Time per request:       0.856 [ms] (mean)
Time per request:       0.086 [ms] (mean, across all concurrent requests)
Transfer rate:          878.22 [Kbytes/sec] received

Connection Times (ms)
              min  mean[+/-sd] median   max
Connect:        0    0   0.2      0       4
Processing:     0    0   0.2      0       6
Waiting:        0    0   0.2      0       6
Total:          0    1   0.2      1       8

Percentage of the requests served within a certain time (ms)
  50%      1
  66%      1
  75%      1
  80%      1
  90%      1
  95%      1
  98%      2
  99%      2
 100%      8 (longest request)

查看缺页次数

项	指标
缺页中断	5016

可以看到缺页次数大大的降低

下面是perf输出的报告：

[root@localhost uv_server]# perf stat ./a.out
listening on 0.0.0.0:7070
^C./a.out: Interrupt

 Performance counter stats for './a.out':

         15,851.09 msec task-clock                #    0.503 CPUs utilized      
             9,408      context-switches          #    0.594 K/sec              
                15      cpu-migrations            #    0.001 K/sec              
             5,016      page-faults               #    0.316 K/sec              
   <not supported>      cycles                                                  
   <not supported>      instructions                                            
   <not supported>      branches                                                
   <not supported>      branch-misses                                           

      31.499659541 seconds time elapsed

       0.948398000 seconds user
      14.871534000 seconds sys

火焰图

perf record -g -F 99 ./a.out

perf script -i perf.data >out.perf
~/FlameGraph/stackcollapse-perf.pl out.perf > out.floded
~/FlameGraph/flamegraph.pl out.floded > cp.svg

libuv-perf-o2-pic

结论

编译优化，并不能很好的提升程序的性能，我猜测是I7 cpu的功能强大，有强大的推理和乱序逻辑，对编译优化的依赖越来越低导致的。而这不能说编译优化不重要，需要看服务器是在什么体系结构下运行。
压测客户端和服务器应该隔离运行，在同时运行在一个核时，它们会不断的抢资源，但是由于本人资源有限，所以只能在虚拟机里玩玩，但也需要配置多核来运行。
服务器离不开内存分配和释放，如果裸用系统的内存分配，会给性能带来一定的下降，这时可以通过使用优秀的内存管理库来优化代码，这无论在什么时候，都是适用的
程序运行在虚拟机上，代表一次系统调用会发生四次上下文切换，但由于本人懒惰，所以没有继续在原生的机器上再压测服务器，有兴趣的读者，可以试试再裸机上跑，我敢肯定，性能会有不小的飞跃
从上面的结果看，有一点我是有怀疑态度的，就是延迟基本时毫秒级的，这跟我对网络性能的认知有很大的差距，网络的性能应该时微妙级别的，我现在只能猜测是虚拟机或者网卡的问题。有兴趣的读者可以再进一步论证。
http服务器是使用TCP短连接的，但一般的echo服务器都是长连接的，这可以省去大量的握手和挥手的rtt时间，如果把程序改成长连接，那么性能也会有很大的飞跃。但同时也因为本人懒惰，所以先写到这里。

libuv http echo服务器性能测试
大家对于echo服务器非常熟悉，但是每当被人问起我，echo服务器的性能是多少，我总是答不上来，要是搭上了，那也是...
libuv echo server性能压测
Server代码 Makefile 客户端 rust_echo_bench 运行网卡配置分析客户端请求qps...
性能测试 | jmeter(上)_基本功能概述
性能测试工具_Jmeter 工具的特点：可以对HTTP和FTP服务器等进行压力和性能测试，也可以对任何数据库进行...
Netty EventLoop与IO模型整理
netty示例 maven依赖 echo服务器示例 echo服务器测试 IO模型 BIO模型伪异步IO NIO模...
HTTP服务器状态码解析，秒杀一切面试问题
HTTP服务器状态代码解析在做接口测试、性能测试的时候,会产生比较多的HTTP错误查看其错误,有超时的,链接不到...
http_load使用详解
1.什么是http_load http_load是一款基于Linux平台的web服务器性能测试工具，用于测试web...
性能测试概览Ⅳ
五类性能测试用例 ●预期指标的性能测试 ●并发用户的性能测试 ●疲劳强度和大数据量的性能测试 ●服务器性能测试 ●...
性能测试常用的测试方法
性能测试可分为七大类如下：第一、后端测试后端性能测试，也就是服务器端性能测试。后端性能测试，是通过性能测试...
压测难，难于上青天，80%的直播应用都败在了这里
目前腾讯WeTest服务器性能测试已经正式对外开放，点击链接：http://wetest.qq.com/gaps/...
网站监控—http_load
1.什么是http_loadhttp_load是一款基于Linux平台的web服务器性能测试工具，用于测试web服...