元组、字典、集合
tuple
和list 很像, 只是不能修改
#
a = ("小鲁班", "李元芳", 100, 99.0, [333, 444])
print(a)
print(type(a))
print(a[0])
#
# a[0] = "haha" # TypeError: 'tuple' object does not support item assignment
# 很多函数返回的类型是元组
# 一个元素的元组
b = (100,)
print(type(b))
for v in a:
print(v)
dict
# dict
# key ---> value
hero_info = { "name": "鲁班七号","grade": 13, "skill": "无敌鲨鱼炮"}
print(hero_info)
print(type(hero_info))
# 访问 通过key 访问 value
print(hero_info["name"])
print(hero_info["skill"])
# print(hero_info["money"]) # KeyError: 'money'
print(hero_info.get("name", "haha"))
print(hero_info.get("money")) #None
print(hero_info.get("money", 200)) #None
# 修改
hero_info["grade"] = 14
print(hero_info)
# 增加
hero_info["money"] =10000
print(hero_info)
# 删除
# del hero_info
del hero_info["grade"]
print(hero_info)
hero_info = { "name": "鲁班七号","grade": 13, "skill": "无敌鲨鱼炮"}
# 元素个数
print(len(hero_info))
# 获取字典的所有键
print(list(hero_info.keys()))
# 获取字典的所有值
print(list(hero_info.values()))
# 获取字典的所有键值
print(list(hero_info.items()))
# 遍历
for k, v in hero_info.items():
print(k,"---->",v)
text = "You Are My Sunshine The other night dear as I lay sleeping I dreamed I held you in my arms When I aw You Are My Sunshine The other night dear as"
# 词频统计
# {“Sunshine”:4, "other":2...}
wl = text.split()
print(wl)
counts = {}
for word in wl:
# counts[word] = counts[word] + 1
counts[word] = counts.get(word, 0) + 1
print(counts)
set
#无序不重复
a = {112, 3, 4, 5}
print(a)
print(type(a))
b = {222, 666, 666, 666}
print(b)
# print(b[0]) #'set' object does not support indexing
for x in b:
print(x)
HTML
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
<h1>haha</h1>
<h2>haha</h2>
<h5>haha</h5>
无序列表
<ul>
<li>肖申克救赎</li>
<li>霸王别姬</li>
</ul>
<ol>
<li>肖申克救赎</li>
<li>霸王别姬</li>
</ol>
<div>
div无色无味的容器
</div>
href 是a表情的一个属性
<a href="https://www.baidu.com">点击跳转至百度</a>
<img src="https://ss0.bdstatic.com/70cFuHSh_Q1YnxGkpoWK1HF6hhy/it/u=2062164223,3783917881&fm=26&gp=0.jpg" alt="">
</body>
</html>
Python爬虫
requests库
获取远程html源代码
import requests
url = "https://www.baidu.com/"
response = requests.get(url)
# 看编码
print(response.encoding)
response.encoding = 'utf-8'
print(response.encoding)
print(response.status_code)
print(response.text)
# 写入本地
with open('baidu.html', mode='w', encoding='utf-8') as f:
f.write(response.text)
douban
import requests
url = "https://movie.douban.com/top250"
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"}
# 添加请求头, 目的是伪装成浏览器
response = requests.get(url, headers=headers)
# 看编码
print(response.encoding)
print(response.status_code)
print(response.text)
# 写入本地
with open('豆瓣.html', mode='w', encoding='utf-8') as f:
f.write(response.text)
网友评论