使用python抓取喜馬拉雅音樂並且下載

2021-09-24 08:10:48 字數 2207 閱讀 2044

#

#更改後面網址以對應欲爬取內容

import requests

from lxml import etree

class spider(object):

def __init__(self):

def getreponce(self,url):

reponce = requests.get(url, headers=self.headers)

return reponce

def main(self,reponce,id):

try:

list =

htmls = etree.html(reponce.text)

datas = htmls.xpath("//a[@class='album-title line-1 lg bold _kc']")

for data in datas:

tits = data.xpath('./@title')

href = data.xpath('./@href')

id+=1

print(list)

idnum=int(input('輸入欲抓取該音樂整套輯的id:'))

idnum=list[idnum-1].split('/')

return idnum[-2]

except exception as f:

print('拼音內容可能有誤,或無法對應該拚音,請重新執行'+f)

return ""

def getjson(self,reponce):

import re

json=reponce.text

trackname = re.findall(r'"trackname":(.*?),',json)

src = re.findall(r'"src":(.*?),',json)

# print(src)

print(trackname)

return src ,trackname

def download(self,src ,trackname):

import os

#在當前路徑中生成資料夾(放置音樂用)

paths = os.getcwd()

paths = os.path.join(paths, '自定義')

print(paths)

#判斷資料夾是否存在

being = os.path.exists(paths)

if being == false:

print('建立資料夾')

os.mkdir(paths)

for m4a,name in zip(src,trackname):

music = requests.get(m4a.strip('"'), headers=self.headers)

name =name.strip('"')

with open(paths+"\\"+name+'.m4a','wb')as f:

f.write(music.content)

print(name,'下載完畢')

print('done')

stop=input('等待')

def pini(music):

from pypinyin import lazy_pinyin

music = lazy_pinyin(music)

if len(music):

music = ''.join(music) + "/"

else:

music = ""

return music

if __name__ == '__main__':

idnum = 0

music=input('請輸入音樂爬取的音樂總類,不輸入表示音樂全部種類:')

pini=pini(music)

url = "{}".format(pini)

spider=spider()

reponce=spider.getreponce(url)

idnumber_url="".format(spider.main(reponce,idnum))

src,trackname=spider.getjson(spider.getreponce(idnumber_url))

spider.download(src,trackname)

喜馬拉雅 音訊爬取

import requests import parsel headers 介面的 url 每一集 都有相對應的 id 的引數值 def down mp4 player,mp4 name resp requests.get player,headers headers path r c users ...

Python爬蟲 喜馬拉雅音訊爬取

爬取喜馬拉雅三國中的前十章音訊 匯入requests模組 import requests 匯入正規表示式 import re 解決反爬問題,匯入ua header 網頁源 中獲取的前十章id sound ids 64686514,64689648,64695831,64695832,3218935,...

Python爬蟲實戰案例之爬取喜馬拉雅音訊資料詳解

前言 喜馬拉雅是專業的音訊分享平台,匯集了有聲 有聲讀物,有聲書,fm電台,兒童睡前故事,相聲小品,鬼故事等數億條音訊,我最喜歡聽民間故事和德雲社相聲集,你呢?今天帶大家爬取喜馬拉雅音訊資料,一起期待吧!這個案例的 位址在這裡 專案目標 爬取喜馬拉雅音訊資料 受害者位址 本文知識點 1 系統分析網頁...