How to use python Pool to save data in dict?

18 Views Asked by At

I use code as below to download file parallelly:

def download_ts_file(seq_and_ts_url: tuple[int, str], store_dir: str, attempts: int = 10):
    seq, ts_url = seq_and_ts_url
    ts_dir = os.path.join(store_dir, f'{seq+1:04}.ts')

    if os.path.isfile(ts_dir):
        return
    
    ts_fname = ts_url.split('/')[-1]
    ts_res = None

    for _ in range(attempts):
        try:
            ts_res = requests.get(ts_url, headers=header)
            if ts_res.status_code == 200:
                break
        except Exception:
            pass
        time.sleep(.5)

    if isinstance(ts_res, Response) and ts_res.status_code == 200:
        with open(ts_dir, 'wb+') as f:
            f.write(ts_res.content)
    else:
        print(f"Failed to download streaming file: {ts_fname}.")

with get_context("spawn").Pool(MAX_POOLS) as pool:
    gen = pool.imap_unordered(partial(download_ts_file, store_dir='.'), enumerate(ts_url_list))
    for _ in tqdm.tqdm(gen, total=len(ts_url_list)):
        pass
    pool.close()
    pool.join()
    time.sleep(1)

I want to know how can I save result to a dict with Pool? something like as below, but it seems not work, what can I modify?

def get_data(driver, videoId, id):
    url = 'https://video.tv/{}-{}.html'.format(videoId, id)
    driver.get(url)

    url = myUtils.findElementsByXpath(driver, "//div[@class='url']") or ""
    if url:
        url = url.text
    return id, url

urls = {}
driver = myUtils.getFirefoxDriver()
with get_context("spawn").Pool(10) as pool:
    for id, url in pool.imap_unordered(get_data, driver, videoId, range(1, 100)):
        if url:
            print("{id}: {url}".format(id, url))
            urls[id] = url
if urls:
    saveDict(urls)
0

There are 0 best solutions below