new_plan

作者: 月夜星空下 | 来源:发表于2020-07-16 11:05 被阅读0次
    #!/usr/bin/python
    # coding=utf-8
    # -*- coding: UTF-8 -*-
    import re
    import os
    import json
    import time
    import jieba
    import socket
    import os, re
    import random
    import codecs
    import pymysql
    import os, pprint
    import mysql.connector
    from flask import Flask, jsonify, request, abort
    # from PIL import Image
    from selenium import webdriver
    import selenium.webdriver.support.ui as ui
    from selenium.webdriver import ActionChains
    from apscheduler.schedulers.blocking import BlockingScheduler
    from selenium.webdriver.support.ui import Select
    from plan2 import The_original_title, all_one_middle
    from selenium.webdriver.common.action_chains import ActionChains
    
    app = Flask(__name__)
    @app.route('/lksServer',methods=['POST'])
    
    def hello_world():
        data = json.loads(request.get_data())
        if isinstance(data,list) == True:
            print("传过来的数量",len(data))
            key_list = len(data)
            with open(r'C:\Users\Administrator\Desktop\likeshuo\Keywords_provided.txt','w') as f:
                for i in data:
                    i_txt = i+'\n'
                    f.write(i_txt)
        else:
            print("dict")
            Url = data['Url']
            Column = data['Column']
            Account = data['Account']
            PassWord = data['PassWord']
            Hour = data['Hour']
            Minute = data['Minute']
            print(Url, Column, Account, PassWord, Hour, Minute)
            path_url = r"C:\Users\Administrator\Desktop\likeshuo\url.txt" # 图片
            op_url = open(path_url, encoding='utf-8')
            path_cp1 = r"C:\Users\Administrator\Desktop\likeshuo\corpus_one.txt"  # 标题后缀(疑问、诉求)
            op_cp1 = open(path_cp1, encoding='utf-8')
            path_cp2 = r"C:\Users\Administrator\Desktop\likeshuo\corpus_two.txt"  # 标题末句子(价值输出)
            op_cp2 = open(path_cp2, encoding='utf-8')
            # path = r"C:\Users\Administrator\Desktop\likeshuo\Keywords_provided.txt" # 提供的精准词
            with open(r'C:\Users\Administrator\Desktop\likeshuo\Keywords_provided.txt', "r") as f:
                pool_op_cp = f.readlines()
            # pool_op_cp = open(r'C:\Users\Administrator\Desktop\Keywords_provided.txt', 'r', encoding='UTF-8')
            nbsp = ""
            print(pool_op_cp)
            pool_op_cp1 = list(op_cp1)
            pool_op_cp2 = list(op_cp2)
            pool_op_url = list(op_url)
            def stopwordslist():
                stopwords = [line.strip() for line in open(r"C:\Users\Administrator\Desktop\likeshuo\stop_words", encoding='UTF-8').readlines()]
                return stopwords    # 停用词
            all_cp = []
            all_list_all = []
            all_list_con_all = []
            for line in pool_op_cp:
                word = line
                cleaned_data = re.findall(u"[\u4e00-\u9fa5]+", word)
                r = ''
                for ic in cleaned_data:
                    b = str(cleaned_data)
                    i = str(ic)
                    r += ic
                a = jieba.lcut(r)
                stopwords = stopwordslist()
                # 输出结果为outstr
                outstr = ''
                # 去停用词
                all_list = []
                all_str = ''
                all_con_str = ''
                all_list_con = []
                for word in a:
                    if word not in stopwords:
                        if word != '\t':
                            outstr += word
                            outstr += " "
                outstr = outstr.replace(" ", "")
                random.shuffle(pool_op_cp1)
                random.shuffle(pool_op_cp2)
                random.shuffle(pool_op_url)
                con_cp1 = pool_op_cp1[0]
                con_cp2 = pool_op_cp2[0]
                con_url = pool_op_url[0]
                all = outstr + con_cp1 + con_cp2
                all = all.replace("\n", "")
                all_str += all
                all_list.append(all_str)
                all_list_all.extend(all_list)
                all_con_str += con_url
                all_list_con.append(all_con_str)
                all_list_con_all.extend(all_list_con)
            The_original_title = all_list_all   # 装在列表的标题词
            The_url = all_list_con_all  # 装在列表的所有图片
    
            Key_word = []
            dict_title = {}
            num_title = 0
            for i in The_original_title:
                num_title += 1
                dict_title[num_title] = i
                from dtl_nlp import max_length_words
                ll = max_length_words(i)
                Key_word.append(ll)
            keyword_all = Key_word  # 关键词列表
    
            config = {'host': '127.0.0.1',
                      'user': 'root',
                      'password': 'yz1028959',
                      'port': 3306,
                      'database': 'data_likeshuo',
                      'charset': 'utf8'
                      }
            cnn = mysql.connector.connect(**config)
            cursor = cnn.cursor(buffered=True)
    
            # 查询dede数据库文章总和
    
            content_page_num = len(The_original_title)
            all_one_middle = []
            for keyword_one, j in zip(keyword_all,The_original_title):
                n = random.randint(3, 5)
                data = (keyword_one, n)
                t_key = data[0]
    
                pic = ' ''"' + j + '"'
                p_lable = "<p>"
                p_lable_last = "</p>"
    
                cx_sql = 'select t1.content from children_title t LEFT JOIN children_english t1 on t.ceid=t1.id where t.ckey=%s ORDER BY RAND() LIMIT %s'
                cursor.execute(cx_sql, data, )
                all = cursor.fetchall()
                all = [t for t in all if t != (None,)]
                # print("1")
                content_page_num = len(The_original_title)
                # 去掉空格
                p1 = re.compile(r'\s+')  # 正则
                add_list = []
                content_list = []
                for i in all:
    
                    content = ''
                    content += i[0]
                    content = p1.sub('', content)  # 去除内容空格
                    content_list.append(content)  # 新内容添加到content_list
                n_piecewise_content = ''
                # print(The_original_title)
                all_text = ''
                for i,url in zip(content_list, The_url):
                    n_piecewise_content += p_lable + nbsp + i + p_lable_last
                    center_title = j.center(100) + '\r\n'
                    url = "<p style={center}><img src={url}  alt={rt}></p>".format(center="text-align:center", url=url, rt=j)  # 图片链接
                    # middle = url + n_piecewise_content + X_Link + more   # 文章中部
                    middle = url + n_piecewise_content
                    all_text = center_title + middle
                all_one_middle.append(middle)
            print("ok")
            # print(The_original_title)  # 标题
                # print(len(all_one_middle))    # 内容
    
    
    
            driver_path = r"C:\Users\Administrator\Desktop\likeshuo\chromedriver"
            driver = webdriver.Chrome(executable_path=driver_path)
            driver.get(Url)  # 网站
            driver.find_element_by_name("userid").send_keys(Account)  # 用户名
            driver.find_element_by_name("pwd").send_keys(PassWord)  # 密码
            driver.find_element_by_name("sm1").click()  # 登录
            time.sleep(1)
    
            print("系统:", len(The_original_title))
            for title, content in zip(The_original_title, all_one_middle):
                # print(title)
                # time.sleep(3)
                driver.find_element_by_xpath('/html/body/div[3]/div[1]/div[2]/ul/li[4]/a').click()  # 点击内容维护
                time.sleep(2)
                # 切换到iframe
                driver.switch_to.frame("main")  # 板块
                a = driver.find_element_by_xpath('/html/body/table/tbody/tr/td/table/tbody/tr/td/input[1]')  # 添加文档
    
                ActionChains(driver).move_to_element(a).click(a).perform()  # 行为链点击文档
                time.sleep(1)
                b = driver.find_element_by_xpath('//*[@id="title"]').send_keys(title)  # 标题
                c = Select(driver.find_element_by_xpath('//*[@id="typeid"]'))  # 文章栏目位置
                d = c.select_by_value(Column)  # 文章栏目索引
                driver.execute_script('window.scrollTo(0,document.body.scrollHeight)')  # JS控制,window滚动条
                time.sleep(1)
                Ycode = driver.find_element_by_xpath('//*[@id="cke_8"]').click()  # 点击转换源码处
                driver.find_element_by_xpath('//*[@id="cke_contents_body"]/textarea').send_keys(content)
                driver.find_element_by_xpath('/html/body/form/table[6]/tbody/tr/td[2]/input').click()  # 点击提交
                driver.switch_to.parent_frame()
            driver.quit()
        return 'ok'
    
    if __name__ == '__main__':
        app.run(host='0.0.0.0', port=8890)
    

    相关文章

      网友评论

          本文标题:new_plan

          本文链接:https://www.haomeiwen.com/subject/pnkfhktx.html