重组标题

作者: 月夜星空下 | 来源:发表于2020-01-17 17:27 被阅读0次

#!/usr/bin/python
# -*- coding: UTF-8 -*-
import codecs
import os, pprint
import os
import random, readJSON
import os, re
import jieba
import re
path = '/Users/lilong/Desktop/1.txt'
f = open(path, encoding='utf-8')
path_cp1 = '/Users/lilong/Desktop/cp1.txt'
op_cp1 = open(path_cp1, encoding='utf-8')
path_cp2 = '/Users/lilong/Desktop/cp2.txt'
op_cp2 = open(path_cp2, encoding='utf-8')
pool_op_cp = list(f)
pool_op_cp1 = list(op_cp1)
pool_op_cp2 = list(op_cp2)
# content = ''
def stopwordslist():
    stopwords = [line.strip() for line in open('/Users/lilong/Desktop/stop_words', encoding='UTF-8').readlines()]
    return stopwords

for line in pool_op_cp:
    word = line
    cleaned_data = re.findall(u"[\u4e00-\u9fa5]+", word)
    r = ''
    for ic in cleaned_data:
        b = str(cleaned_data)
        i = str(ic)
        r += ic
    a = jieba.lcut(r)
    # print("正在分词...")
    # print(a)
    stopwords = stopwordslist()
    # 输出结果为outstr
    outstr = ''
    # 去停用词
    for word in a:
        if word not in stopwords:
            if word != '\t':
                outstr += word
                outstr += " "
    outstr = outstr.replace(" ", "")
    # content += line
    random.shuffle(pool_op_cp1)
    random.shuffle(pool_op_cp2)
    con_cp1 = pool_op_cp1[0]
    con_cp2 = pool_op_cp2[0]
    all = outstr + con_cp1 + con_cp2
    all = all.replace("\n", "")
    print(all)
txt = os.path.join('/Users/lilong/Desktop/z.txt')
w_txt = open(txt, 'w')
f = codecs.open(txt, 'r+', encoding='utf-8')
print(f)
f.write(all)
f.close()

网友评论

本文标题：重组标题

本文链接：https://www.haomeiwen.com/subject/npfhzctx.html

延伸阅读

深度阅读

您也可以注册成为美文阅读网的作者，发表您的原创作品、分享您的心情！

重组标题

相关文章

网友评论

延伸阅读

深度阅读

栏目导航

热点阅读