美文网首页
Python 正则表达式查找

Python 正则表达式查找

作者: 郭青茄 | 来源:发表于2020-05-04 15:51 被阅读0次

    import re

    import csv

    # data = open('tieba01.txt', 'r', encoding='UTF-8').readlines()

    # print(len(data))

    with open('tieba01.txt', 'r', encoding='UTF-8') as f:

        source= f.read()

    #search函数只能从左至右找出第一个最大的匹配,findall函数可以找出所有的匹配,并返回一个list列表

    # findall(pattern, string, flags=0)  re.S忽略换行

    result_list= []

    username_list= re.findall('username="(.*?)"',source,re.S)#

    content_list= re.findall('d_post_content j_d_post_content " style="display:;">(.*?)</div>', source, re.S)

    reply_time_list=re.findall('class="tail-info">(2020.*?)<', source, re.S)

    def clear_content(src):

        newstr= re.sub(r'(<img class.*?>)', '', src) #sub是substitute的所写,表示替换;re.sub(pattern, repl, string, count=0, flags=0)

        return newstr

    print(username_list[0],content_list[12])

    for iin range(len(username_list)):

        clearstr= clear_content(content_list[i])

        result= {'username':username_list[i],

                  'content':clearstr,

                  'time': reply_time_list[i]}

        result_list.append(result)

    with open('tieba01.csv','w',newline='',encoding='utf-8-sig') as f:

        csv_write= csv.writer(f)

        writer= csv.DictWriter(f,fieldnames=['username','content','time'])

        writer.writeheader()

        writer.writerows(result_list)

    # with open('E:/dst.csv', 'wb') as dstfile:  # 写入方式选择wb,否则有空行

    #    writer = csv.DictWriter(dstfile, fieldnames=['username','content','time'])

    #    writer.writeheader()  # 写入表头

    #    writer.writerows(data)  # 批量写入

    相关文章

      网友评论

          本文标题:Python 正则表达式查找

          本文链接:https://www.haomeiwen.com/subject/wwhyghtx.html