from bs4 import BeautifulSoup # pip install beautifulsoup4
import requests
import time
import random
def run():
page_url = "http://www7b.biglobe.ne.jp/~browneye/english/TOEIC400-1.htm"
r = requests.get(page_url)
r.encoding = r.apparent_encoding
soup = BeautifulSoup(r.text, features="html.parser")
td_list = soup.find_all("td")
td_values = [x.text for x in td_list]
splited_list = []
for index in range(0, len(td_values), 4):
word_row = td_values[index: index + 4]
if word_row[0] == '\u3000':
continue
splited_list.append(word_row)
with open("toeic_words.txt", "w") as f:
for value in splited_list:
f.write("{},{}\n".format(value[1], value[2]))
print("Yes, done.")
if __name__ == "__main__":
run()
网友评论