urllib.request 模拟游览器发送get请求,往请求头里面添加参数
import urllib.request
response = urllib.request.urlopen("http://www.baidu.com")
import urllib.request
self.heads = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.100 Safari/537.36"
}
# 让UA包含在请求里面,更像浏览器请求
request = urllib.request.Request(url=url,headers=self.heads)
content = urllib.request.urlopen(request)
urllib.parse url的解析,合并,编码,解码
# 以字典的方法进行编码
kw = {
'kw': '帅哥'
}
result = urllib.parse.urlencode(kw)
url = 'https://tieba.baidu.com/f?' + result
# 以字符串进行编码
kw = '帅哥'
result = urllib.parse.quote(kw)
print(result)
urllib.parse.unquote(result)
baseurl = 'https://book.qidian.com/info/1017777013#Catalog'
suburl = 'info/1018368514'
result = urllib.parse.urljoin(baseurl,suburl)
print(result)
import urllib.parse
url = 'https://book.qidian.com/info/1017777013#Catalog'
result = urllib.parse.urlparse(url=url,allow_fragments=False)
# 必须传入七个参数
url = ('https','book.qidian.com','/info/1017777013','','','Catalog')
result = urllib.parse.urlunparse(url)