流程

代码

+ 爬取漫客栈漫画
代码示例
import requests
import os

file = 'img\\'
num = 1
if not os.path.exists(file):
    os.makedirs(file)
""" 爬取漫客栈漫画 """
""" 网址链接: https://www.mkzhan.com/212062/932940.html"""

# 模拟浏览器
headers = {
    "user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36"
}

# 漫画目录ID
link = 'https://comic.mkzcdn.com/chapter/v1/?comic_id=212062'
# 发送请求，获取响应的json数据
link_json = requests.get(url=link, headers=headers).json()
print(link_json)
datas = link_json['data']
for index in datas:
    # 提取章节ID
    chapter_id = index['chapter_id']
    # 提取章节名字
    title = index['title']
    print(title)
    # 漫画图片
    url = 'https://comic.mkzcdn.com/chapter/content/v1/'
    # 查询参数
    params = {
        'chapter_id':chapter_id,
        'comic_id':'212062',
        'format':'1',
        'quality':'1',
        'sign':'fb2b70ea70a1c8a832a4cb7aa68527d4',
        'type':'1',
        'uid':'79576403'
    }
    # 发送请求
    response = requests.get(url=url, headers=headers, params=params)
    # 获取响应的json数据
    json_data = response.json()
    print(json_data)
    pages = json_data['data']['page']
    print(pages)
    for page in pages:
        img_url = page['image']
        """保存数据"""
        # 获取图片内容
        img_content = requests.get(url = img_url,headers=headers).content
        # 数据保存
        with open(file + title +str(num) + '.jpg', 'wb') as f:
            f.write(img_content)
        print(img_url)
        num += 1
Markdown 编辑器