#coding=utf-8
import re,urllib2,os,urllib,requests
import webbrowser
def getHtmlCode(url):
response = urllib2.urlopen(url)
return response.read()
def getMidiUrl(htmlString):
regMidiUrl = re.compile("top-mid-title\"><a href=\"(.+?)\"")
return regMidiUrl.findall(htmlString)
def getDownloadUrl(htmlString):
regDownloadUrl = re.compile("href=\"getter(.+?)\" download>Download MIDI")
return regDownloadUrl.findall(htmlString)
def getTitleUrl(htmlString):
regTitleUrl = re.compile("<h1>(.+?)<\/h1>")
return regTitleUrl.findall(htmlString)
if __name__ == '__main__':
url = 'https://freemidi.org/topmidi'
htmlCode = getHtmlCode(url)
midis = getMidiUrl(htmlCode)
for i in range(121,len(midis)):
midiUrl = midis[i]
url2 = 'https://freemidi.org/' + midiUrl
htmlCode2 = getHtmlCode(url2)
titles = getTitleUrl(htmlCode2)
title = str(i + 1) + '.' + titles[0]
print title
downloadUrls = getDownloadUrl(htmlCode2)
downloadUrl = 'https://freemidi.org/getter' + downloadUrls[0]
print downloadUrl
chrome_path = 'open -a /Applications/Google\ Chrome.app %s'
webbrowser.get(chrome_path).open(downloadUrl)
# download
# urllib.urlretrieve(downloadUrl,"%s.mid" %("midi/" + title))
网友评论