aiohttp强调的是异步并发。提供了对asyncio/await的支持,可以实现单线程并发IO操作。
import aiohttp
import asyncio
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def main():
async with aiohttp.ClientSession() as session:
html = await fetch(session, 'http://httpbin.org/headers')
print(html)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
{'headers':{'Accept':'*/*','Accept-Encoding':'gzip, deflate','Connection':'close','Host':'httpbin.org','User-Agent':'Python/3.6 aiohttp/3.2.1'}}
#我们的例子不涉及服务端
原理就是不用等待一个操作完成,可以同步做其他事情
(如果需要等这个操作的结果呢?前面有一个功能是批量操作一组?)
官方推荐使用ClientSession来管理会话。
aioredis和motor两个异步操作数据库的库。
另外,Scrapy也是异步的,是基于Twisted事件驱动的。在任何情况下,都不要写阻塞的代码。阻塞的代码包括:
◆ 1.访问文件、数据库或者Web;
◆ 2.产生新的进程并需要处理新进程的输出,如运行shell命令;
◆ 3.执行系统层次操作的代码,如等待系统队列。
这里就出现一个问题,如果一个操作必须依赖上一个操作,是否需要异步操作,如何应用这个操作?
下面是一段异步操作代码
import asyncio
import aiohttp
from bs4 import BeautifulSoup
import logging
class AsnycGrab(object):
def __init__(self, url_list, max_threads):
self.urls = url_list
self.results = {}
self.max_threads = max_threads
def __parse_results(self, url, html):
try:
soup = BeautifulSoup(html, 'html.parser')
title = soup.find('title').get_text()
except Exception as e:
raise e
if title:
self.results[url] = title
async def get_body(self, url):
async with aiohttp.ClientSession() as session:
async with session.get(url, timeout=30) as response:
assert response.status == 200
html = await response.read()
return response.url, html
async def get_results(self, url):
url, html = await self.get_body(url)
self.__parse_results(url, html)
return 'Completed'
async def handle_tasks(self, task_id, work_queue):
while not work_queue.empty():
current_url = await work_queue.get()
try:
task_status = await self.get_results(current_url)
except Exception as e:
logging.exception('Error for {}'.format(current_url), exc_info=True)
def eventloop(self):
q = asyncio.Queue()
[q.put_nowait(url) for url in self.urls]
loop = asyncio.get_event_loop()
tasks = [self.handle_tasks(task_id, q, ) for task_id in range(self.max_threads)]
loop.run_until_complete(asyncio.wait(tasks))
loop.close()
if __name__ == '__main__':
async_example = AsnycGrab(['http://edmundmartin.com',
'https://www.udemy.com',
'https://github.com/',
'https://zhangslob.github.io/',
'https://www.zhihu.com/'], 5)
async_example.eventloop()
print(async_example.results)
里面涉及了一个队列通信的功能asyncio.Queue()