I know how to do it using a Requests lib just with while True loop, and when I get an empty page or 404 error, I break it. But using aiohttp I use gather, and I just cancel() all tasks when the page is empty, and I losr tasks that are not done.
async def get_goods_from_pages(session, page):
url = f'https://somewebsite?page={page}'
async with session.get(url, headers=headers) as r:
soup = BS(await r.text(), 'lxml')
all_goods = soup.find_all('div', class_='js_category-list-item')
if all_goods:
for el in all_goods:
print(el)
else:
raise SomeError
# collect all tasks function
async def get_pages_info():
tasks = []
async with aiohttp.ClientSession() as session:
for page in range(1, 150):
task = asyncio.create_task(get_goods_from_pages(session, page))
tasks.append(task)
try:
group = asyncio.gather(*tasks)
await group
except Exception:
group.cancel()
I also tried to use while True loop and call the function using await, but I got very bad speed of parsing.