tesseract opens console with pyinstaller

27 Views Asked by At

I have a program that does ocr using tesseract and pyqt5

thread.py

class SearchThread(QThread, QObject):

    signal = pyqtSignal(str)
    finished = pyqtSignal()
    
    def __init__(self, data):
        super(QThread, self).__init__()
        super(QObject, self).__init__()
        self.data = data
        
    def search(self):
        pdf_files = PDF.get_pdf_files(self.data['folder'])
        for pdf in pdf_files:
            if pdf.search(self.data['query']):
                self.signal.emit(pdf.pdf_path)
        self.finished.emit()
        
    def run(self):
        try:
            self.search()
        except Exception as e:
            raise e
            self.signal.emit(f'Error : {str(e)}')

util.py

class PDF:

    def __init__(self, pdf_path):
        self.pdf_path = pdf_path
        
    def search(self, query):
        pytesseract.pytesseract.tesseract_cmd = OCR_EXEC_PATH
        pytesseract_config = f'--tessdata-dir "{OCR_DATA_PATH}"'
        images = pdf2image.convert_from_path(self.pdf_path)
        for image in images:
            text = pytesseract.image_to_string(image, lang='ara', config=pytesseract_config)
            for keyword in query.split():
                if keyword in text:
                    return True
            
    @staticmethod
    def get_pdf_files(folder):
        path = pathlib.Path(folder)
        return [PDF(str(file.resolve())) for file in path.glob('**/*.pdf')]

ui.py

class UI(QMainWindow):

    def __init__(self):
        super(UI, self).__init__()
        self.search_thread = None
        try:
            import os, sys
            os.chdir(sys._MEIPASS)
        except:
            pass
        loadUi(DESIGNER_FILE, self)
        ....
        self.show()      
            
    def on_search_btn_click(self):
        if self.search_thread and self.search_thread.isRunning():
            return           
        else:
            data = {'query': self.query.text().lower().strip(), 'folder': self.folder.text()}
            self.search_thread = SearchThread(data)
            self.search_thread.signal.connect(self.on_signal_received)
            self.search_thread.finished.connect(self.on_finished)
            self.search_thread.start()
            
    def on_signal_received(self, value):
        self.add_item(value)

    def on_finished(self, value):
        self.cancel_btn.setEnabled(False)

main.py

if __name__ == '__main__':           
    app = QApplication(argv)
    window = UI()
    app.exec_()

build.bat

pyinstaller --noconsole --onefile --name ArchiveSearch --distpath . --add-data="data:data" main.py

Every time a pdf is processed, a console window opens and closes in less than a second when running as an exe. When running directly from python it works fine.

0

There are 0 best solutions below