import requests from requests_html import HTMLSession, HTML from datetime import timedelta import json import datetime # 获取欧洲足球时评微头条内容 """ 获取微头条数据信息 """ max_behot_time = '0' def getWeiToutiaoInfo(): global max_behot_time # 获取上一周的第一天和最后一天的时间戳 lastWeekStartTime, lastWeekEndTime = getLastWeekFirstDayTimeStamp() session = HTMLSession() weitoutiaoGet = session.get(f"https://www.toutiao.com/api/pc/list/user/feed?category=pc_profile_ugc&token=MS4wLjABAAAA7lHc4sBPuZaQ85qdIrwVvWm8Ps5O1kPMpuh5lTJAwII&max_behot_time={max_behot_time}&aid=24&app_name=toutiao_web", verify=False, proxies=None) # print(weitoutiaoGet.text) # 使用Json解析返回的数据 resultJson = json.loads(weitoutiaoGet.text) # 先获取下个max_behot_time时间戳,如果需要翻页查询需要使用该参数 max_behot_time = resultJson['next']['max_behot_time'] # 开始循环解析data数据,获取微头条的内容和发布人员 dataList = resultJson['data'] # 获取每一条消息的发布时间戳,如果发布时间出现早于上周一0点的数据,结束统计,函数返回,否则递归调用当前函数 for data in dataList: # 获取本条新闻的发布时间 publishTime = data["publish_time"] if int(publishTime) > int(lastWeekEndTime): continue elif int(publishTime) < int(lastWeekStartTime): return else: # 如果是比分预测类的微头条,则跳过 content = str(data['content']) print(content) # if content.find("比分预测】")>0: # continue # else: # # 获取该微头条的发布人和发布日期 # author = None # contentSubString = content[-8:] # if contentSubString.find("太能喵")>0: # weitoutiaoMap["太能喵"] = weitoutiaoMap["太能喵"]+1 # elif contentSubString.find("小小")>0: # weitoutiaoMap["小小"] = weitoutiaoMap["小小"]+1 # elif contentSubString.find("大帝强") > 0: # weitoutiaoMap["大帝强"] = weitoutiaoMap["大帝强"] + 1 # elif contentSubString.find("叶小欢") > 0: # weitoutiaoMap["叶小欢"] = weitoutiaoMap["叶小欢"] + 1 getWeiToutiaoInfo() """ 获取当前日期上一周的第一天和最后一天的时间戳 """ def getLastWeekFirstDayTimeStamp(): now = datetime.datetime.now() # 上周第一天和最后一天 last_week_start = now - timedelta(days=now.weekday() + 7, hours=now.hour, minutes=now.minute, seconds=now.second, microseconds=now.microsecond) last_week_end = last_week_start - timedelta(days=-7) print(f"上周第一天的日期是:{last_week_start},最后一天的日期是:{last_week_end}") print(f"上周第一天的时间戳是:{last_week_start.timestamp()},最后一天的时间戳是:{last_week_end.timestamp()}") return last_week_start.timestamp(), last_week_end.timestamp() if __name__ == '__main__': getWeiToutiaoInfo()