元组、字典、集合

tuple

和list 很像，只是不能修改

#
a = ("小鲁班", "李元芳", 100, 99.0, [333, 444])
print(a)
print(type(a))
print(a[0])

#
# a[0] = "haha" # TypeError: 'tuple' object does not support item assignment
# 很多函数返回的类型是元组

# 一个元素的元组
b = (100,)
print(type(b))

for v in a:
    print(v)

dict

# dict
# key ---> value
hero_info = { "name": "鲁班七号","grade": 13, "skill": "无敌鲨鱼炮"}
print(hero_info)
print(type(hero_info))
# 访问  通过key 访问 value
print(hero_info["name"])
print(hero_info["skill"])
# print(hero_info["money"]) # KeyError: 'money'
print(hero_info.get("name", "haha"))
print(hero_info.get("money")) #None
print(hero_info.get("money", 200)) #None
# 修改
hero_info["grade"] = 14
print(hero_info)
# 增加
hero_info["money"] =10000
print(hero_info)
# 删除
# del hero_info
del hero_info["grade"]
print(hero_info)

hero_info = { "name": "鲁班七号","grade": 13, "skill": "无敌鲨鱼炮"}
# 元素个数
print(len(hero_info))
# 获取字典的所有键
print(list(hero_info.keys()))
# 获取字典的所有值
print(list(hero_info.values()))
# 获取字典的所有键值
print(list(hero_info.items()))

# 遍历
for k, v in hero_info.items():
    print(k,"---->",v)
text = "You Are My Sunshine The other night dear as I lay sleeping I dreamed I held you in my arms When I aw You Are My Sunshine The other night dear as"
# 词频统计
# {“Sunshine”：4, "other":2...}
wl = text.split()
print(wl)
counts = {}
for word in wl:
    # counts[word] = counts[word] + 1
    counts[word] = counts.get(word, 0) + 1
print(counts)

set

#无序不重复
a = {112, 3, 4, 5}
print(a)
print(type(a))
b = {222, 666, 666, 666}
print(b)
# print(b[0]) #'set' object does not support indexing

for x in b:
    print(x)

HTML

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>
<h1>haha</h1>
<h2>haha</h2>
<h5>haha</h5>
无序列表
<ul>
    <li>肖申克救赎</li>
    <li>霸王别姬</li>
</ul>
<ol>
    <li>肖申克救赎</li>
    <li>霸王别姬</li>
</ol>
<div>
    div无色无味的容器
</div>
href 是a表情的一个属性
<a href="https://www.baidu.com">点击跳转至百度</a>
<img src="https://ss0.bdstatic.com/70cFuHSh_Q1YnxGkpoWK1HF6hhy/it/u=2062164223,3783917881&fm=26&gp=0.jpg" alt="">

</body>
</html>

Python爬虫

requests库

获取远程html源代码

import requests
url = "https://www.baidu.com/"
response = requests.get(url)
# 看编码
print(response.encoding)
response.encoding = 'utf-8'
print(response.encoding)
print(response.status_code)
print(response.text)
# 写入本地
with open('baidu.html', mode='w', encoding='utf-8') as f:
    f.write(response.text)

douban

import requests
url = "https://movie.douban.com/top250"
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"}
# 添加请求头， 目的是伪装成浏览器
response = requests.get(url, headers=headers)
# 看编码
print(response.encoding)
print(response.status_code)
print(response.text)
# 写入本地
with open('豆瓣.html', mode='w', encoding='utf-8') as f:
    f.write(response.text)