I use code as below to download file parallelly:
def download_ts_file(seq_and_ts_url: tuple[int, str], store_dir: str, attempts: int = 10):
seq, ts_url = seq_and_ts_url
ts_dir = os.path.join(store_dir, f'{seq+1:04}.ts')
if os.path.isfile(ts_dir):
return
ts_fname = ts_url.split('/')[-1]
ts_res = None
for _ in range(attempts):
try:
ts_res = requests.get(ts_url, headers=header)
if ts_res.status_code == 200:
break
except Exception:
pass
time.sleep(.5)
if isinstance(ts_res, Response) and ts_res.status_code == 200:
with open(ts_dir, 'wb+') as f:
f.write(ts_res.content)
else:
print(f"Failed to download streaming file: {ts_fname}.")
with get_context("spawn").Pool(MAX_POOLS) as pool:
gen = pool.imap_unordered(partial(download_ts_file, store_dir='.'), enumerate(ts_url_list))
for _ in tqdm.tqdm(gen, total=len(ts_url_list)):
pass
pool.close()
pool.join()
time.sleep(1)
I want to know how can I save result to a dict with Pool? something like as below, but it seems not work, what can I modify?
def get_data(driver, videoId, id):
url = 'https://video.tv/{}-{}.html'.format(videoId, id)
driver.get(url)
url = myUtils.findElementsByXpath(driver, "//div[@class='url']") or ""
if url:
url = url.text
return id, url
urls = {}
driver = myUtils.getFirefoxDriver()
with get_context("spawn").Pool(10) as pool:
for id, url in pool.imap_unordered(get_data, driver, videoId, range(1, 100)):
if url:
print("{id}: {url}".format(id, url))
urls[id] = url
if urls:
saveDict(urls)