import requests
from requests_html import HTMLSession, HTML
from datetime import timedelta
import json
import datetime
# 获取欧洲足球时评微头条内容
"""
获取微头条数据信息
"""
max_behot_time = '0'
def getWeiToutiaoInfo():
global max_behot_time
# 获取上一周的第一天和最后一天的时间戳
lastWeekStartTime, lastWeekEndTime = getLastWeekFirstDayTimeStamp()
session = HTMLSession()
weitoutiaoGet = session.get(f"https://www.toutiao.com/api/pc/list/user/feed?category=pc_profile_ugc&token=MS4wLjABAAAA7lHc4sBPuZaQ85qdIrwVvWm8Ps5O1kPMpuh5lTJAwII&max_behot_time={max_behot_time}&aid=24&app_name=toutiao_web", verify=False, proxies=None)
# print(weitoutiaoGet.text)
# 使用Json解析返回的数据
resultJson = json.loads(weitoutiaoGet.text)
# 先获取下个max_behot_time时间戳,如果需要翻页查询需要使用该参数
max_behot_time = resultJson['next']['max_behot_time']
# 开始循环解析data数据,获取微头条的内容和发布人员
dataList = resultJson['data']
# 获取每一条消息的发布时间戳,如果发布时间出现早于上周一0点的数据,结束统计,函数返回,否则递归调用当前函数
for data in dataList:
# 获取本条新闻的发布时间
publishTime = data["publish_time"]
if int(publishTime) > int(lastWeekEndTime):
continue
elif int(publishTime) < int(lastWeekStartTime):
return
else:
# 如果是比分预测类的微头条,则跳过
content = str(data['content'])
print(content)
# if content.find("比分预测】")>0:
# continue
# else:
# # 获取该微头条的发布人和发布日期
# author = None
# contentSubString = content[-8:]
# if contentSubString.find("太能喵")>0:
# weitoutiaoMap["太能喵"] = weitoutiaoMap["太能喵"]+1
# elif contentSubString.find("小小")>0:
# weitoutiaoMap["小小"] = weitoutiaoMap["小小"]+1
# elif contentSubString.find("大帝强") > 0:
# weitoutiaoMap["大帝强"] = weitoutiaoMap["大帝强"] + 1
# elif contentSubString.find("叶小欢") > 0:
# weitoutiaoMap["叶小欢"] = weitoutiaoMap["叶小欢"] + 1
getWeiToutiaoInfo()
"""
获取当前日期上一周的第一天和最后一天的时间戳
"""
def getLastWeekFirstDayTimeStamp():
now = datetime.datetime.now()
# 上周第一天和最后一天
last_week_start = now - timedelta(days=now.weekday() + 7, hours=now.hour, minutes=now.minute, seconds=now.second, microseconds=now.microsecond)
last_week_end = last_week_start - timedelta(days=-7)
print(f"上周第一天的日期是:{last_week_start},最后一天的日期是:{last_week_end}")
print(f"上周第一天的时间戳是:{last_week_start.timestamp()},最后一天的时间戳是:{last_week_end.timestamp()}")
return last_week_start.timestamp(), last_week_end.timestamp()
if __name__ == '__main__':
getWeiToutiaoInfo()