import time
import requests
from lxml import etree
from selenium import webdriver
from kaisha import str2url
from threading import Thread
browser = webdriver.Chrome(executable_path="/Users/apple/Desktop/tool/chromedriver")
def get_page():
url = 'https://www.xiami.com/chart'
browser.get(url)
time.sleep(1)
return browser.page_source
def get_mp3(url):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre"
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.content
return None
def process_mp3(mp3_url, mp3_title):
mp3_url = str2url(mp3_url)
print(mp3_url, mp3_title)
mp3_content = get_mp3(mp3_url)
save_mp3(mp3_content, mp3_title)
def save_mp3(mp3_content, mp3_title):
with open('./mp3/%s.mp3' % mp3_title, 'wb') as f:
f.write(mp3_content)
def parse_page(html):
etree_html = etree.HTML(html)
items = etree_html.xpath('//tr[@class="songwrapper"]')
threads = []
for item in items:
mp3_url = item.xpath('./@data-mp3')[0]
mp3_title = item.xpath('./@data-title')[0]
thread = Thread(target=process_mp3, args=(mp3_url, mp3_title))
threads.append(thread)
for thread in threads:
thread.start()
def main():
html = get_page()
parse_page(html)
if name == 'main':
main()