#python -m ensurepip
from Bio import Entrez
from Bio import Medline
import csv
import numpy as np
from PIL import Image #pip install pillow
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator #pip install wordcloud
import matplotlib.pyplot as plt
#获取
# 参数设置
Entrez.email = "abc@zju.edu.cn"
Entrez.max_tries=6
Entrez.sleep_between_tries = 500
Entrez.api_key="1620334c97028769f1779fa9078fac18f108"
# 搜索
paper_counts =1000
serch_term = 'Lachnospiraceae'#对excel提取的词进行搜索
hd_esearch = Entrez.esearch(db="pubmed", term=serch_term,retmax=paper_counts, usehistory="y")
read_esearch = Entrez.read(hd_esearch)
hd_esearch.close()
eftech_id_list = read_esearch["IdList"]
efetch_step = 1000
articleInfo = []
wordcloud_abstract =""
wordcloud_keywords =""
wordcloud_jounals =""
for efetch_start in range(0, paper_counts, efetch_step):
print("Download records %i to %i" % (efetch_start + 1, int(efetch_start+efetch_step)))
flag = False
n=2
while (flag != True):
head_efetch = Entrez.efetch(db="pubmed", id=eftech_id_list, rettype="medline", retmode="text", retstart=efetch_start, retmax=efetch_step)
records = Medline.parse(head_efetch)
records = list(records)
if ("TI" in records[0]):
# print(records[0].get("TI"))
# print("success")
flag = True
for record in records:
# print(record)
# print("title:", record.get("TI", "?"))
# print("authors:", record.get("AU", "?"))
# print("source:", record.get("SO", "?"))
temp_dict = {u'pubmed_id':record.get("PMID", "?"),
u'journal':record.get("JT", "?"),
u'title':record.get("TI", "?"),
u'keywords':str(record.get("OT", "?")).replace(']','').replace('[','').replace("'", ""),
u'abstract':record.get("AB", "?"),}
wordcloud_abstract = wordcloud_abstract +"\n"+record.get("AB", "?")
wordcloud_keywords = wordcloud_keywords + "\t"+str(record.get("OT", "?")).replace(']','').replace('[','').replace("'", "")
wordcloud_jounals = wordcloud_jounals +"\n"+record.get("JT", "?")
# print(temp_dict)
articleInfo.append(temp_dict.copy())
# print(articleInfo)
temp_dict.clear()
else:
print(" fatal error", "第", n, "次尝试")
n = n + 1
# print(articleInfo)
# print(wordcloud_abstract)
# journal wordcloud
text_journal = wordcloud_jounals
wordcloud_jounals_plt = WordCloud().generate(text_journal)
plt.figure(figsize=(20,30))
plt.imshow(wordcloud_jounals_plt, interpolation='bilinear')
plt.axis("off")
plt.savefig('C:\\Users\\Mr.R\\Desktop\\1\\'+serch_term+"_"+"journals"+".png", dpi=300, bbox_inches='tight')
# abstract wordcloud
text_abstract = wordcloud_abstract
wordcloud_abstract_plt = WordCloud().generate(text_abstract)
plt.figure(figsize=(20,30))
plt.imshow(wordcloud_abstract_plt, interpolation='bilinear')
plt.axis("off")
plt.savefig('C:\\Users\\Mr.R\\Desktop\\1\\'+serch_term+"_"+"abstract"+".png", dpi=300, bbox_inches='tight')
file_name = 'C:\\Users\\Mr.R\\Desktop\\1\\' + serch_term +"4.csv"
field_names = ['pubmed_id', 'title','keywords','journal','abstract','year']
with open(file_name, 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames = field_names)
writer.writeheader()
writer.writerows(articleInfo)
image.png
image.png
网友评论