美文网首页
重组标题

重组标题

作者: 月夜星空下 | 来源:发表于2020-01-17 17:27 被阅读0次
    #!/usr/bin/python
    # -*- coding: UTF-8 -*-
    import codecs
    import os, pprint
    import os
    import random, readJSON
    import os, re
    import jieba
    import re
    path = '/Users/lilong/Desktop/1.txt'
    f = open(path, encoding='utf-8')
    path_cp1 = '/Users/lilong/Desktop/cp1.txt'
    op_cp1 = open(path_cp1, encoding='utf-8')
    path_cp2 = '/Users/lilong/Desktop/cp2.txt'
    op_cp2 = open(path_cp2, encoding='utf-8')
    pool_op_cp = list(f)
    pool_op_cp1 = list(op_cp1)
    pool_op_cp2 = list(op_cp2)
    # content = ''
    def stopwordslist():
        stopwords = [line.strip() for line in open('/Users/lilong/Desktop/stop_words', encoding='UTF-8').readlines()]
        return stopwords
    
    for line in pool_op_cp:
        word = line
        cleaned_data = re.findall(u"[\u4e00-\u9fa5]+", word)
        r = ''
        for ic in cleaned_data:
            b = str(cleaned_data)
            i = str(ic)
            r += ic
        a = jieba.lcut(r)
        # print("正在分词...")
        # print(a)
        stopwords = stopwordslist()
        # 输出结果为outstr
        outstr = ''
        # 去停用词
        for word in a:
            if word not in stopwords:
                if word != '\t':
                    outstr += word
                    outstr += " "
        outstr = outstr.replace(" ", "")
        # content += line
        random.shuffle(pool_op_cp1)
        random.shuffle(pool_op_cp2)
        con_cp1 = pool_op_cp1[0]
        con_cp2 = pool_op_cp2[0]
        all = outstr + con_cp1 + con_cp2
        all = all.replace("\n", "")
        print(all)
    txt = os.path.join('/Users/lilong/Desktop/z.txt')
    w_txt = open(txt, 'w')
    f = codecs.open(txt, 'r+', encoding='utf-8')
    print(f)
    f.write(all)
    f.close()
    

    相关文章

      网友评论

          本文标题:重组标题

          本文链接:https://www.haomeiwen.com/subject/npfhzctx.html