爬取漫客栈漫画
流程
代码
爬取漫客栈漫画
代码示例
import requests
import os
file = 'img\\'
num = 1
if not os.path.exists(file):
os.makedirs(file)
""" 爬取漫客栈漫画 """
""" 网址链接: https://www.mkzhan.com/212062/932940.html"""
# 模拟浏览器
headers = {
"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36"
}
# 漫画目录ID
link = 'https://comic.mkzcdn.com/chapter/v1/?comic_id=212062'
# 发送请求,获取响应的json数据
link_json = requests.get(url=link, headers=headers).json()
print(link_json)
datas = link_json['data']
for index in datas:
# 提取章节ID
chapter_id = index['chapter_id']
# 提取章节名字
title = index['title']
print(title)
# 漫画图片
url = 'https://comic.mkzcdn.com/chapter/content/v1/'
# 查询参数
params = {
'chapter_id':chapter_id,
'comic_id':'212062',
'format':'1',
'quality':'1',
'sign':'fb2b70ea70a1c8a832a4cb7aa68527d4',
'type':'1',
'uid':'79576403'
}
# 发送请求
response = requests.get(url=url, headers=headers, params=params)
# 获取响应的json数据
json_data = response.json()
print(json_data)
pages = json_data['data']['page']
print(pages)
for page in pages:
img_url = page['image']
"""保存数据"""
# 获取图片内容
img_content = requests.get(url = img_url,headers=headers).content
# 数据保存
with open(file + title +str(num) + '.jpg', 'wb') as f:
f.write(img_content)
print(img_url)
num += 1