parallism python using multiprocessing

109 Views Asked by At

I have been trying to use multiprocessing module from python to achieve parallism.

I'm able to execute my code, it run in parallel but after some time just only one process finishes its task and the others exit without finishing I know there is join()method that wait all tasks to finish but not work properly. I have been reading multiprocessing's manual page and forums to find out why it isn't working and i haven't figured it out yet.

I think that the problem may be related with some other thing like database or python version my python version 3.10 and i have 8 cores CPU , any help

Here is my code:

I have 5 process (20000 items split to batch of 4000)

def process_batch(self, batch_index, batched_payloads):
        payloads = self.import_file.get_payloads_for_import()
        imported_rows = []
        total_payload_count = len(payloads)
        batch_size = frappe.conf.data_import_batch_size or 4000
        for i, payload in enumerate(batched_payloads):
            doc = payload.doc
            row_indexes = [row.row_number for row in payload.rows]
            current_index = (i + 1) + (batch_index * batch_size)
            if set(row_indexes).intersection(set(imported_rows)):
                print("Skipping imported rows", row_indexes)
                if total_payload_count > 5:
                    frappe.publish_realtime(
                        "data_import_progress",
                        {
                            "current": current_index,
                            "total": total_payload_count,
                            "skipping": True,
                            "data_import": self.data_import.name,
                        },
                        user=frappe.session.user,
                    )
                    continue
            try:
                start = timeit.default_timer()
                # insert data to database process_doc method
                doc = self.process_doc(doc)
                processing_time = timeit.default_timer() - start
                eta = self.get_eta(current_index, total_payload_count, processing_time)
                if self.console:
                    update_progress_bar(
                        f"Importing {total_payload_count} records",
                        current_index,
                        total_payload_count,
                    )
                elif total_payload_count > 5:
                    frappe.publish_realtime(
                        "data_import_progress",
                        {
                            "current": current_index,
                            "total": total_payload_count,
                            "docname": doc.name,
                            "data_import": self.data_import.name,
                            "success": True,
                            "row_indexes": row_indexes,
                            "eta": eta,
                        },
                        user=frappe.session.user,
                    )
                    # commit after every successful import
                frappe.db.commit()
            except Exception:
                # rollback if exception
                frappe.db.rollback()

here my code for multiprocessing

def import_data()
  batch_size = frappe.conf.data_import_batch_size or 4000
    
         workers = []
            for batch_index, batched_payloads in enumerate(frappe.utils.create_batch(payloads, batch_size)):
                        p = Process(target=self.process_batch, args=(batch_index, batched_payloads))
                        p.start()
                        workers.append(p)
           for worker in workers:
                worker.join()

same thing if I do :

processes_count = multiprocessing.cpu_count()
        pool = multiprocessing.Pool(processes_count)
        #pool = multiprocessing.Pool(4)
        for batch_index, batched_payloads in enumerate(
            frappe.utils.create_batch(payloads, batch_size)):
            pool.apply_async(self.process_batch, args=(batch_index, batched_payloads))
        pool.close()
        pool.join()

edit:

same thing with concurrent.futures.ProcessPoolExecutor

batches = enumerate(frappe.utils.create_batch(payloads, batch_size))
with concurrent.futures.ProcessPoolExecutor(max_workers=processes_count) as executor:
            executor.map(self.process_batch, batches)

same thing with concurrent.futures.ProcessPoolExecutor submit()

with concurrent.futures.ProcessPoolExecutor(max_workers=processes_count) as executor:
            future_to_batch = [executor.submit(self.process_batch, batch_index, batched_payloads) for batch_index, batched_payloads in enumerate(frappe.utils.create_batch(payloads, batch_size))]
            for future in concurrent.futures.as_completed(future_to_batch):
                future.result() 
0

There are 0 best solutions below