流程

代码

+ python代码
""" python 采集微博评论"""
import csv

import requests
from datetime import datetime

# 爬取目标网址: https://weibo.com/1195242865/Q28CwnQyw

f = open('data.csv', mode='w', newline='',encoding='utf-8-sig')
csv_writer = csv.DictWriter(f, fieldnames=['日期', '昵称', '性别', '地区', '当前位置', '评论', '描述'])
csv_writer.writeheader()
# 模拟浏览器
headers = {
    # 检测是否有登陆账号
    'cookie':'SUB=_2AkMfsEW7f8NxqwFRmvwWzm7nZIV0yArEieKp7LRgJRMxHRl-yT9xqk0vtRB6NDBrVCarYZjlu_nRNVG5dwvVzEhS-y4r; SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9W5hk-.OY0fp2WOjXH8T5_CQ; XSRF-TOKEN=sfi19qFE_ZpLEQmzlbQJdxiJ; WBPSESS=cbjWrs_UvapT2Pg_hlK_fEjSg_LlZXmo9fnpnvcVnV4awhKssG_jwe_dwh5Cc3Y12ssa2ewzdQ75MzVIPzmQK4SNCQWkNm0aC08ei9RFLqy9covtSs9kLoWQzWq8wuJfIhZ47K1lSIbuDVl9JF3jMmvOh-_ANGhPI0Roylm6lt8=',
    # 浏览器和设备的基本身份信息
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36 Edg/141.0.0.0',
    # 防盗链
    # 当浏览器请求某个资源(如图片)时,它会在 HTTP 请求头中附带一个 Referer 字段,表示该请求是从哪个网页发起的。防盗链机制就是通过检查这个 Referer 来判断请求是否来自合法来源.
    'referer':'https://weibo.com/1195242865/Q28CwnQyw'
}

# 接口地址
url = 'https://weibo.com/ajax/statuses/buildComments'

# 载荷,即接口地址?后面的参数
params = {
    'is_reload':'1',
    'id':'5205097965683572',
    'is_show_bulletin':'2',
    'is_mix':'0',
    'count':'10',
    'uid':'1195242865',
    'fetch_level':'0',
    'locale':'en-US'
}

# 发送请求
response = requests.get(url=url, headers=headers, params=params)
json_data = response.json()
# print(json_data)
comment_list = json_data['data']
comment_info = []
for index in comment_list:
    format_str = '%a %b %d %H:%M:%S %z %Y'
    dt = datetime.strptime(index['created_at'], format_str)
    date = dt.strftime('%Y/%m/%d %H:%M:%S')
    gender = index['user']['gender']
    if gender == 'f':
        sex = '女'
    elif gender == 'm':
        sex = '男'
    else:
        sex = '未知'
    dit = {
        '日期':date,
        '昵称':index['user']['screen_name'],
        '性别':sex,
        '地区':index['source'],
        '当前位置':index['user']['location'],
        '评论':index['text'],
        '描述': index['user']['description']
    }
    print(dit)
    comment_info.append(dit)
    for comment in comment_info:
        csv_writer.writerow(comment)

    # print(index)