from selenium import webdriver
import re
url="http://www.baidu.com/"
brower = webdriver.Firefox()
brower.get(url)
pagesoures = brower.page_source # 抓取网页源代码
restr = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+' # 如果不带括号会输出全部,只要()内的数据如果政策抓取不到也许他前面有空格
rex = re.compile(restr, re.IGNORECASE)
mylist = rex.findall(pagesoures)
brower.close()
print(mylist)
data = open("D:\data.txt", 'w+')
print(mylist, file=data)
data.close()
python读入写出
python常用的读取文件函数有三种read()、readline()、readlines()
read() 一次性读全部内容
with open("test.txt", "r") as f: #打开文件
data = f.read() #读取文件
print(data)
readline() 读取第一行内容
with open("test.txt", "r") as f:
data = f.readline()
print(data)
readlines() 列表
with open("test.txt", "r") as f:
data = f.readlines()
print(data)
会出现换行
with open("test.txt", "r") as f:
for line in f.readlines():
line = line.strip('\n') #去掉列表中每一个元素的换行符
print(line)
write
with open("test.txt","w") as f:
f.write("这是个测试!") #这句话自带文件关闭功能,不需要再写f.close()
print文件中
data=open("D:\data.txt",'w+')
print('这是个测试',file=data)
data.close()
网友评论