from selenium import webdriver
from bs4 import BeautifulSoup
import pymongo
import xlwt
# main function entry:
if __name__ == '__main__':
URL = "http://typhoon.weather.com.cn/gis/typhoon_p.shtml"
browser = webdriver.Firefox() # 创建浏览器对象
browser.get(URL) # 打开页面,并加载内容,渲染对象
response = browser.page_source # 获取页面的html源码
sourceData = response.encode()
fobj = open("data2.txt", 'wb')
fobj.write(sourceData)
fobj.close()
f = open("./data2.txt", encoding='UTF-8')
lines = f.readlines()
#print(str(lines))
soup = BeautifulSoup( str(lines),'lxml')
#print(soup.prettify())
#只输出第一个span标签的内容 .string表示输出标签内的内容
#print(soup.span.string)
#.contents .children表示输出该节点下的所有自己节点
#print(soup.span.contents)
#.next_siblings .previous_siblings表示输出该节点前后节点
#print(soup.span.next)
#print(soup.span.previous)
#findall找出所有匹配
#print(soup.find_all('span')[163].string)
#print(len(soup.find_all('span')))
#print(soup.find_all('span')[0].string[0])
#print(len(soup.find_all('span'))/4)
#建立数组添加标题
a = []
b = []
c = []
a.append('时间')
a.append('经纬度')
a.append('气压(hPa)')
a.append('风速(m/s)')
#将所有符合初步筛选结果的数据填入数组
for i in range(int(len(soup.find_all('span'))/4)):
#if soup.find_all('span')[i*4].string[0] == True:
#if soup.find_all('span')[i * 4].string[0].equal(2):
a.append(soup.find_all('span')[i * 4].string)
a.append(soup.find_all('span')[i * 4 + 1].string)
a.append(soup.find_all('span')[i * 4 + 2].string)
a.append(soup.find_all('span')[i * 4 + 3].string)
#print(i)
#保存数据标题
c.append(a[0])
c.append(a[1])
c.append(a[2])
c.append(a[3])
#最后筛选获得需要的台风数据
for j in range(int((len(a)/4))):
#print('j',j)
#print(a[0])
if a[0]:
#print(111)
#print('a[j][0]///////',a[j][0])
if a[0][0] != '2':
#print(222)
a.pop(0)
网友评论