美文网首页
利用浏览器下载文件

利用浏览器下载文件

作者: Do_More | 来源:发表于2017-07-28 18:25 被阅读0次
    #coding=utf-8
    import re,urllib2,os,urllib,requests
    import webbrowser
    
    def getHtmlCode(url):
        response = urllib2.urlopen(url)
        return response.read()
    
    def getMidiUrl(htmlString):
        regMidiUrl = re.compile("top-mid-title\"><a href=\"(.+?)\"")
        return regMidiUrl.findall(htmlString)
    
    def getDownloadUrl(htmlString):
        regDownloadUrl = re.compile("href=\"getter(.+?)\" download>Download MIDI")
        return regDownloadUrl.findall(htmlString)
    
    def getTitleUrl(htmlString):
        regTitleUrl = re.compile("<h1>(.+?)<\/h1>")
        return regTitleUrl.findall(htmlString)
    
    if __name__ == '__main__':
        url = 'https://freemidi.org/topmidi'
        htmlCode = getHtmlCode(url)
        midis = getMidiUrl(htmlCode)
        for i in range(121,len(midis)):
            midiUrl = midis[i]
            url2 = 'https://freemidi.org/' + midiUrl
            htmlCode2 = getHtmlCode(url2)
    
            titles = getTitleUrl(htmlCode2)
            title = str(i + 1) + '.' + titles[0]
            print title
    
            downloadUrls = getDownloadUrl(htmlCode2)
            downloadUrl = 'https://freemidi.org/getter' + downloadUrls[0]
            print downloadUrl
    
            chrome_path = 'open -a /Applications/Google\ Chrome.app %s'
            webbrowser.get(chrome_path).open(downloadUrl)
    
            # download
            # urllib.urlretrieve(downloadUrl,"%s.mid" %("midi/" + title))
    

    相关文章

      网友评论

          本文标题:利用浏览器下载文件

          本文链接:https://www.haomeiwen.com/subject/lduflxtx.html