流程

代码

+ python代码

# 采集歌曲宝(https://www.gequbao.com/)数据

import requests
import re
import parsel
from prettytable import PrettyTable

"""1. 发送请求"""
# 模拟浏览器
headers = {
    # user-agent: 用户代理,表示浏览器/设备基本身份信息
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36 Edg/138.0.0.0'
}
while True:
    key = input('请输入你要下载的歌手/歌名(00退出): ')
    if key == '00':
        break
    # 请求网址: 搜索接口
    search_url = f'https://www.gequbao.com/s/{key}'
    # 发送请求,获取网页源码
    search_data = requests.get(url = search_url, headers = headers).text
    # 将获取到的html字符串数据转成可解析的对象
    selector = parsel.Selector(search_data)
    # 第一次提取,提取所有信息所在的 div 标签
    rows = selector.css('.row')[2:-1]
    # 实例化对象
    tb = PrettyTable()
    # 设置字段名
    tb.field_names = ['序号', '歌手', '歌名']
    # 创建空列表
    info = []
    # 定义序号
    page = 0
    # for 循环提取列表里面的元素
    for row in rows:
        # 提取具体每首歌曲信息
        music_id = row.css('.music-link::attr(href)').get().split('/')[-1]
        singer = row.css('.text-jade::text').get().strip()
        title = row.css('.text-primary span::text').get()
        dict = {
            'ID': music_id,
            'singer': singer,
            'title': title,
        }
        # 添加字段内容
        tb.add_row([page, singer, title])
        # 把字典添加到空列表中
        info.append(dict)
        # 每次循环序号+1
        page += 1
    print(tb)
    num = input('请输入你想要下载的歌曲序号: ')
    # 根据序号获取歌曲ID
    music_id = info[int(num)]['ID']
    # 请求网址,音频播放页面获取
    url = f'https://www.gequbao.com/music/{music_id}'
    # 发送请求
    response = requests.get(url = url, headers = headers)

    """2. 获取数据 """
    # 获取响应的文本数据: 网页源代码
    html = response.text
    # print(html)

    """3. 解析数据"""
    # 提取 play_id
    play_id = re.findall('"play_id":"(.*?)"',html)[0]
    # 提取歌名
    title = re.findall('<h1 class="badge badge-light-green" style="max-width: 60%;">(.*?)</h1>',html)[0]
    # print(play_id, title)

    """3. 发送请求"""
    #请求网址
    link = f'https://www.gequbao.com/api/play-url'
    # 请求参数
    data = {
        'id': play_id
    }
    # 发送请求, 获取响应的 json 数据
    json_data = requests.post(url = link, data = data, headers = headers).json()
    # print(json_data)

    """解析数据"""
    # 提取歌曲链接地址
    music_url = json_data['data']['url']
    # print(music_url)

    """发送请求"""
    # 获取音频内容
    music_content = requests.get(url = music_url, headers = headers).content
    # 数据保存
    with open('music\\'+title+'.mp3', 'wb') as f:
        # 写入数据
        f.write(music_content)
    print(f'{title}, 下载完成!!!')