__author__ = 'shixq'
# source:https://www.jianshu.com/p/b36adf266c3d
from Bio import Entrez
Entrez.email = "2576755886@qq.com" # 输入自己的邮箱,这里使用一个我随便申请的QQ邮箱
# with open('PMID.txt') as pmid_file
pmid_file = open('PMID.txt')# 输入文件不要有空行,筛掉NBK开头的文献。
output = open('PMID_abstract.txt', 'w', encoding='utf-8')
id_list = []
abstracts = []
for element in pmid_file:
id_list.append(element.strip())# 去掉'/n'
count = 0
for pid in id_list:
handle = Entrez.efetch(db="pubmed", id=pid, rettype="abstract", retmode="text")# Entrez 里的efetch模块获取摘要页面的text内容。
re =[line.strip() for line in handle.readlines()] #将所有的换行输出排列在一行上
output.write(pid + '\t' + ''.join(re) + '\n')#将一个list内的所有元素不换行输出
count += 1
print('complete', '%.1f%%'%((count/len(id_list))*100))# 在屏幕上打印完成的进度百分比
# output.write(pid + '\t' + sab + '\n')