提取.pcap的头部信息并分类

作者: Aedda | 来源:发表于2019-12-04 16:41 被阅读0次

提取.pcap的头部信息并分类
【Python】header头部文件解析
Python 提取邮件头基本信息
提取.pcap文件中的host
多光谱遥感找矿技术流程
2017.9.11
【实验问题记录】使用tshark从大pcap文件中过滤出想要的数
OpenCV和SVM分类器在自动驾驶中的车辆检测
今日作业
go 爬虫乱码

'''
host = host
name = 应用名别名

'''

from scapy.all import rdpcap  # 读pcap
import re
import sys
from urllib.request import unquote  # url解码


def extract(src):
    packets = rdpcap(str(src))
    print(repr(packets))
    for data in packets:
        res = repr(data)

        # host
        host = re.findall('Host: (.*?)\\\\r\\\\n', res)
        if len(host) > 0:
            for host_i in host:
                if '\\' not in host_i:
                    all_host.append(host_i)

        # tcp_server_name
        server_name = re.findall('\\\\x00\\\\x00\\\\x\d\d(.*?)\\\\x00', res)
        if len(server_name) > 0:
            for server_name_i in server_name:
                if '-' not in server_name_i and 'Host' not in server_name_i and '.com' in server_name_i and '\\' not in server_name_i and '/' not in server_name_i:
                    all_server_name.append(server_name_i)

        # UA
        UA = re.findall('User-Agent: (.*?)\\\\r\\\\n', res)
        if len(UA) > 0:
            for UA_i in UA:
                if '\\' not in UA_i:
                    all_UA.append(UA_i)

        # 应用名别名
        name = re.findall('(com\..*?)\\\\r', res)
        if len(name) > 0:
            for name_i in name:
                if ('\\' or 'com.cn' or 'com.org') not in name_i:
                    if '&' not in name_i:
                        all_com_name.append(name_i)
                    else:
                        name_1 = name_i.split('&')[0]
                        all_com_name.append(name_1)

        # url
        url = re.findall('GET (.*?) HTTP|POST (.*?) HTTP|PUT (.*?) HTTP', res)
        if len(url) > 0:
            for url_i in url:
                for url_o in url_i:
                    if url_o != '' and ''.join(url_o.split(' ')) != '/':
                        all_url.append(unquote(url_o))

        # cookie
        cookie = re.findall('\\\\r\\\\nCookie: (.*?)\\\\r\\\\n', res)
        if len(cookie) > 0:
            for cookie_i in cookie:
                all_cookie.append(unquote(cookie_i))


def all_host_to():
    # host
    print('*' * 50 + 'Host' + '*' * 50 + '\n')
    for all in list(set(all_host)):
        print(all)


def all_server_name_to():
    # host
    print('*' * 50 + 'server_name' + '*' * 50 + '\n')
    for all in list(set(all_server_name)):
        print(all)


def all_UA_to():
    # host
    print('*' * 50 + 'UA' + '*' * 50 + '\n')
    for all in list(set(all_UA)):
        print(all)


def all_name_to():
    # name
    print('*' * 50 + 'Name' + '*' * 50 + '\n')
    for all in list(set(all_com_name)):
        print(all)


def all_url_to():
    # url
    print('*' * 50 + 'URL' + '*' * 50 + '\n')
    for all in list(set(all_url)):
        print(all)


def all_cookie_to():
    # url
    print('*' * 50 + 'Cookie' + '*' * 50 + '\n')
    for all in list(set(all_cookie)):
        print(all)


def all_ls_to():
    # 全部数据
    all_ls = all_host + all_com_name + all_server_name + all_url + all_cookie + all_UA
    print('*' * 50 + '全部' + '*' * 50 + '\n')
    for all in list(set(all_ls)):
        print(all)


def main(src):
    extract(src)

    all_host_to()
    all_name_to()
    all_url_to()
    all_cookie_to()
    all_UA_to()
    all_server_name_to()

    # all_ls_to()


if __name__ == '__main__':
    all_server_name = []
    all_host = []
    all_com_name = []
    all_url = []
    all_cookie = []
    all_UA = []

    main(r'./tcp.pcap')
    # main(sys.argv[1])