# hw_13
#抓取网页数据
importrequests
url ='http://www.ygdy8.com/'
headers = {
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding':'gzip, deflate',
'Accept-Language':'en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4',
'Cache-Control':'max-age=0',
'Connection':'keep-alive',
'DNT':'1',
'Host':'www.ygdy8.com',
'If-Modified-Since':'Tue, 05 Sep 2017 14:46:00 GMT',
'If-None-Match':'"0448db05526d31:530"',
'Referer':'https://www.google.com/',
'Upgrade-Insecure-Requests':'1',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36',
}
req = requests.get(url,headers=headers)
status_code=req.status_code
print(status_code)
req.encoding ='gb2312'
html=req.text
print(html)
#把抓取到的网页数据写入到指定的文件夹存为HTML 格式
fp=open(r'C:\Users\haoran\Desktop\Python study\dy.html','w',encoding='utf-8')
fp.write(html)
fp.close