美文网首页python热爱者Python学习
在这个520特别的日子里,分享几个用的上的Python代码

在这个520特别的日子里,分享几个用的上的Python代码

作者: python分享者 | 来源:发表于2018-05-20 21:09 被阅读15次
    timg.jpg

    Python520表白神器!心爱的她

    from turtle import *
    from time import sleep
    
    def go_to(x, y):
       up()
       goto(x, y)
       down()
    
    
    def big_Circle(size):  #函数用于绘制心的大圆
       speed(1)
       for i in range(150):
           forward(size)
           right(0.3)
    
    def small_Circle(size):  #函数用于绘制心的小圆
       speed(1)
       for i in range(210):
           forward(size)
           right(0.786)
    
    def line(size):
       speed(1)
       forward(51*size)
    
    def heart( x, y, size):
       go_to(x, y)
       left(150)
       begin_fill()
       line(size)
       big_Circle(size)
       small_Circle(size)
       left(120)
       small_Circle(size)
       big_Circle(size)
       line(size)
       end_fill()
    
    def arrow():
       pensize(10)
       setheading(0)
       go_to(-400, 0)
       left(15)
       forward(150)
       go_to(339, 178)
       forward(150)
    
    def arrowHead():
       pensize(1)
       speed(1)
       color('red', 'red')
       begin_fill()
       left(120)
       forward(20)
       right(150)
       forward(35)
       right(120)
       forward(35)
       right(150)
       forward(20)
       end_fill()
    
    
    def main():
       pensize(2)
       color('red', 'pink')
       #getscreen().tracer(30, 0) #取消注释后,快速显示图案
       heart(200, 0, 1)          #画出第一颗心,前面两个参数控制心的位置,函数最后一个参数可控制心的大小
       setheading(0)             #使画笔的方向朝向x轴正方向
       heart(-80, -100, 1.5)     #画出第二颗心
       arrow()                   #画出穿过两颗心的直线
       arrowHead()               #画出箭的箭头
       go_to(400, -300)
       write("author:520Python", move=True, align="left", font=("宋体", 30, "normal"))
       done()
    
    main()
    

    如果表白失败了!那么...........

    Python爬取妹子图

    安慰你幼小的心灵,毕竟今天都是秀秀秀秀,一心只为“圣贤书”,两耳不闻窗外事

    #!/usr/bin/env python
    # coding=utf-8
    import os
    import time
    import threading
    from multiprocessing import Pool, cpu_count
    
    import requests
    from bs4 import BeautifulSoup
    
    headers = {
        'X-Requested-With': 'XMLHttpRequest',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Chrome/56.0.2924.87 Safari/537.36',
        'Referer': "http://www.mmjpg.com"
    }
    dir_path = r"E:\mmjpg"      # 下载图片保存路径
    def save_pic(pic_src, pic_cnt):
        """ 将图片下载到本地文件夹 """
        try:
            img = requests.get(pic_src, headers=headers, timeout=10)
            imgname = "pic_cnt_{}.jpg".format(pic_cnt + 1)
            with open(imgname, 'ab') as f:
                f.write(img.content)
                print(imgname)
        except Exception as e:
            print(e)
    def make_dir(folder_name):
        """ 新建套图文件夹并切换到该目录下 """
        path = os.path.join(dir_path, folder_name)
        # 如果目录已经存在就不用再次爬取了,去重,提高效率。存在返回 False,否则反之
        if not os.path.exists(path):
            os.makedirs(path)
            print(path)
            os.chdir(path)
            return True
        print("Folder has existed!")
        return False
    def delete_empty_dir(dir):
        """ 如果程序半路中断的话,可能存在已经新建好文件夹但是仍没有下载的图片的情况
        但此时文件夹已经存在所以会忽略该套图的下载,此时要删除空文件夹 """
        if os.path.exists(dir):
            if os.path.isdir(dir):
                for d in os.listdir(dir):
                    path = os.path.join(dir, d)     # 组装下一级地址
                    if os.path.isdir(path):
                        delete_empty_dir(path)      # 递归删除空文件夹
            if not os.listdir(dir):
                os.rmdir(dir)
                print("remove the empty dir: {}".format(dir))
        else:
            print("Please start your performance!") # 请开始你的表演
    
    lock = threading.Lock()     # 全局资源锁
    def urls_crawler(url):
        """ 爬虫入口,主要爬取操作 """
        try:
            r = requests.get(url, headers=headers, timeout=10).text
            # 套图名,也作为文件夹名
            folder_name = BeautifulSoup(r, 'lxml').find('h2').text.encode('ISO-8859-1').decode('utf-8')
            with lock:
                if make_dir(folder_name):
                    # 套图张数
                    max_count = BeautifulSoup(r, 'lxml').find('div', class_='page').find_all('a')[-2].get_text()
                    # 套图页面
                    page_urls = [url + "/" + str(i) for i in range(1, int(max_count) + 1)]
                    # 图片地址
                    img_urls = []
                    for index, page_url in enumerate(page_urls):
                        result = requests.get(page_url, headers=headers, timeout=10).text
                        # 最后一张图片没有a标签直接就是img所以分开解析
                        if index + 1 < len(page_urls):
                            img_url = BeautifulSoup(result, 'lxml').find('div', class_='content').find('a').img['src']
                            img_urls.append(img_url)
                        else:
                            img_url = BeautifulSoup(result, 'lxml').find('div', class_='content').find('img')['src']
                            img_urls.append(img_url)
    
                    for cnt, url in enumerate(img_urls):
                        save_pic(url, cnt)
        except Exception as e:
            print(e)
    if __name__ == "__main__":
        urls = ['http://mmjpg.com/mm/{cnt}'.format(cnt=cnt) for cnt in range(1, 953)]
        pool = Pool(processes=cpu_count())
        try:
            delete_empty_dir(dir_path)
            pool.map(urls_crawler, urls)
        except Exception as e:
            time.sleep(30)
            delete_empty_dir(dir_path)
            pool.map(urls_crawler, urls)
    

    Python爬取小说

    import urllib.request
    import re
    # 1 获取主页源代码
    # 2 获取章节超链接
    # 3 获取章节超链接源码
    # 4 获取小说内容
    # 5 下载,文件操作
    
    # 驼峰命名法
    # 获取小说内容
    def getNovertContent():
        # <http.client.HTTPResponse object at 0x000001DFD017F400>
        html = urllib.request.urlopen("http://www.quanshuwang.com/book/0/269").read()
        html = html.decode("gbk")
        # 不加括号  不匹配
        # 正则表达式  .*?  匹配所有
        reg = r'<li><a href="(.*?)" title=".*?">(.*?)</a></li>'
        # 增加效率的
        reg = re.compile(reg)
        urls = re.findall(reg,html)
        # print(urls)
        # 列表
        # [(http://www.quanshuwang.com/book/0/269/78850.html,第一章 山边小村),
        # (http://www.quanshuwang.com/book/0/269/78854.html,第二章 青牛镇)]
        for url in urls:
            # 章节的URL地址
            novel_url = url[0]
            # 章节标题
            novel_title = url[1]
    
            chapt = urllib.request.urlopen(novel_url).read()
            chapt_html = chapt.decode("gbk")
            # r 表示原生字符串   \ \\d  r"\d"
            reg = r'</script>&nbsp;&nbsp;&nbsp;&nbsp;(.*?)<script type="text/javascript">'
            # S 代表多行匹配
            reg = re.compile(reg,re.S)
            chapt_content = re.findall(reg,chapt_html)
            # print(chapt_content)
            # 列表["&nbsp;&nbsp;&nbsp;&nbsp二愣子睁大着双眼,直直望着茅草和烂泥糊成的<br />"]
    
            # 第一个参数   要替换的字符串   替换后的字符串
            chapt_content = chapt_content[0].replace("&nbsp;&nbsp;&nbsp;&nbsp;","")
            # print(chapt_content)    字符串  二愣子睁大着双眼,直直望着茅草和烂泥糊成的<br />
            chapt_content = chapt_content.replace("<br />","")
    
            print("正在保存 %s"%novel_title)
            # w 读写模式  wb
            # f = open("{}.txt".format(novel_title),'w')
            # f.write(chapt_content)
    
            with open("{}.txt".format(novel_title),'w') as f:
                f.write(chapt_content)
    
            # f.close()
    
    getNovertContent()
    

    但是这些都仅仅只是心灵上的安慰,咱们需要充实自己!

    Python爬取智联招聘

    寻求高薪工作,从此走向人生巅峰,赢娶白富美。更要学好Python!


    大牛炼成记.jpg
    
    
    #-*- coding: utf-8 -*-
    import re
    import csv
    import requests
    from tqdm import tqdm
    from urllib.parse import urlencode
    from requests.exceptions import RequestException
    
    def get_one_page(city, keyword, region, page):
       '''
       获取网页html内容并返回
       '''
       paras = {
           'jl': city,         # 搜索城市
           'kw': keyword,      # 搜索关键词 
           'isadv': 0,         # 是否打开更详细搜索选项
           'isfilter': 1,      # 是否对结果过滤
           'p': page,          # 页数
           're': region        # region的缩写,地区,2005代表海淀
       }
    
       headers = {
           'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
           'Host': 'sou.zhaopin.com',
           'Referer': 'https://www.zhaopin.com/',
           'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
           'Accept-Encoding': 'gzip, deflate, br',
           'Accept-Language': 'zh-CN,zh;q=0.9'
       }
    
       url = 'https://sou.zhaopin.com/jobs/searchresult.ashx?' + urlencode(paras)
       try:
           # 获取网页内容,返回html数据
           response = requests.get(url, headers=headers)
           # 通过状态码判断是否获取成功
           if response.status_code == 200:
               return response.text
           return None
       except RequestException as e:
           return None
    
    def parse_one_page(html):
       '''
       解析HTML代码,提取有用信息并返回
       '''
       # 正则表达式进行解析
       pattern = re.compile('<a style=.*? target="_blank">(.*?)</a>.*?'        # 匹配职位信息
           '<td class="gsmc"><a href="(.*?)" target="_blank">(.*?)</a>.*?'     # 匹配公司网址和公司名称
           '<td class="zwyx">(.*?)</td>', re.S)                                # 匹配月薪      
    
       # 匹配所有符合条件的内容
       items = re.findall(pattern, html)   
    
       for item in items:
           job_name = item[0]
           job_name = job_name.replace('<b>', '')
           job_name = job_name.replace('</b>', '')
           yield {
               'job': job_name,
               'website': item[1],
               'company': item[2],
               'salary': item[3]
           }
    
    def write_csv_file(path, headers, rows):
       '''
       将表头和行写入csv文件
       '''
       # 加入encoding防止中文写入报错
       # newline参数防止每写入一行都多一个空行
       with open(path, 'a', encoding='gb18030', newline='') as f:
           f_csv = csv.DictWriter(f, headers)
           f_csv.writeheader()
           f_csv.writerows(rows)
    
    def write_csv_headers(path, headers):
       '''
       写入表头
       '''
       with open(path, 'a', encoding='gb18030', newline='') as f:
           f_csv = csv.DictWriter(f, headers)
           f_csv.writeheader()
    
    def write_csv_rows(path, headers, rows):
       '''
       写入行
       '''
       with open(path, 'a', encoding='gb18030', newline='') as f:
           f_csv = csv.DictWriter(f, headers)
           f_csv.writerows(rows)
    
    def main(city, keyword, region, pages):
       '''
       主函数
       '''
       filename = 'zl_' + city + '_' + keyword + '.csv'
       headers = ['job', 'website', 'company', 'salary']
       write_csv_headers(filename, headers)
       for i in tqdm(range(pages)):
           '''
           获取该页中所有职位信息,写入csv文件
           '''
           jobs = []
           html = get_one_page(city, keyword, region, i)
           items = parse_one_page(html)
           for item in items:
               jobs.append(item)
           write_csv_rows(filename, headers, jobs)
    
    if __name__ == '__main__':
       main('北京', 'python工程师', 2005, 10)
    

    相关文章

      网友评论

        本文标题:在这个520特别的日子里,分享几个用的上的Python代码

        本文链接:https://www.haomeiwen.com/subject/iftxjftx.html