import requests
import json
import re
headers = {
'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Mobile Safari/537.36',
'cookie':'SSOLoginState=1560744529; ALF=1563336529; SCF=AvTBvMMmz0oZngGtGSQ33rN-nryQ4Lw-Q9ZgXAHOU7FKal63f2BX601Mw8qrS1TwAyGD7MSpzamSAeaYvhnUfmo.; SUB=_2A25wA2IBDeRhGeRP41US-CzKyjyIHXVTDA5JrDV6PUNbktAKLRfhkW1NU-rmoTDAZi6xmOJKcVSCgl64cCl4ftoR; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WW8judAe5n2NqdgSI.PKq9S5JpX5KMhUgL.Fozp1hM01hzceK52dJLoIEnLxKMLBK.LB.qLxK.L1h5L12qLxKML1heL1-qLxK.L1-zLB.HjIgp4; SUHB=0tKbOEMSFuEDBK; MLOGIN=1; _T_WM=62613902799; WEIBOCN_FROM=1110005030; XSRF-TOKEN=53f153; M_WEIBOCN_PARAMS=lfid%3D1005052187382610%26luicode%3D20000174%26uicode%3D20000174'
}
url ='https://m.weibo.cn/feed/group?gid=4218621758638928&max_id='
def get_info(url,no):
res = requests.get(url, headers=headers)
# print(res.text)
json_data = json.loads(res.text)
# print(json_data)
statuses = json_data['data']['statuses']
for statusein statuses:
text = statuse['text']
# new_text = re.sub('[a-zA-Z0-9\s<="_>:/.?]+','',text,re.S)
new_text = re.sub('<(.*?)>', '', text, re.S)
print(new_text)
max_id = json_data['data']['max_id']
no = no +1
if no <=5:
next_url ='https://m.weibo.cn/feed/group?gid=4218621758638928&max_id={}'.format(max_id)
get_info(next_url, no)
else:
pass
get_info(url,1)