Rendering logger output to component on Tesseract.js (with React) slows down

532 Views Asked by At

I would like to add a progress indicator to Tesseract.js logging. The example in docs works just fine, until setting a state hook into logger:

const worker = createWorker({
  logger: (m) => {
    setProgress(m) //new
}});

...

const [ocr, setOcr] = useState('Recognizing...');
const [progress, setProgress] = useState(null); //new

...

return (
  <div className="App">
    <p>           
      <LogComponent progress={progress}/> //new
    </p>
  </div>);

This causes the browser to slow down significantly (probably due React's way to re-render on each state update). Is there a way to get around this? Using React.memo perhaps?

2

There are 2 best solutions below

0
On

I managed to implement and render the progress of the tesseract worker by putting my application render inside a class and using the setState method:

class App extends React.Component {
  constructor(props){
    super(props)
    this.state = {
      file: null
    }
    this.handleChange = this.handleChange.bind(this)
  }

  setProgress(m) {


    if (m.progress !== 0 && m.progress !== 0.5 && m.progress !== 1){
 
     var prog = "Progress: " + Math.round(m.progress*100) + "%"
     this.setState({progress: prog})
    }
   }
 
   worker = createWorker({
     logger: m => this.setProgress(m),
   });



  doOCR = async () => {
    await this.worker.load();
    await this.worker.loadLanguage('eng');
    await this.worker.initialize('eng');
    const { data: { text } } = await this.worker.recognize(this.state.file);
    this.setState({text: extractTotal(text),
                   progress: ""});

  };

  handleChange(event) {
    
    this.setState({text: placeholder});
    this.setState({
      file: URL.createObjectURL(event.target.files[0]),
      
    })

    this.doOCR()

  }

  setText(input){

    if (!input) {
      return "Please select a receipt"   
    } 
    else { 
      return input
    }

  }

  render() {
    console.log("Text: " + this.state.text) 
    return (
      <div className="container">
        <p>{this.setText(this.state.text)}</p>
        <p>{this.state.progress}</p>
        <input type="file" onChange={this.handleChange}/>
        <img src={this.state.file} className='logo' alt=""/>
        
      </div>
    );
  }
}
0
On

You could design it into a hook so your entire component doesn't re-render. Here is a useTesseract hook you can use that I created: https://gist.github.com/KevinDanikowski/25cdcdda2ef4750bcf443f2027cc375a

Copy and Pasted:

import { useState, useEffect } from 'react'
import { createWorker } from 'tesseract.js'

export default function useTesseract({ tesseractLanguage = 'eng', log = false }) {
  const [tesseractWorker, setTesseractWorker] = useState(null)
  const [loadingModel, setLoadingModel] = useState(true)
  const [modelError, setModelError] = useState(false)
  const [imgResults, setImgResults] = useState({})
  const [processing, setProcessing] = useState(false)
  const [progress, setProgress] = useState(0)

  const extractTextFromImage = (imageUrl) => {
    const recognize = async () => {
      const {
        data: {
          hocr: htmlOutput,
          text,
          // tsv, box, unlv
        },
      } = await tesseractWorker.recognize(imageUrl)
      setProcessing(false)
      setImgResults({ html: htmlOutput, text })
    }
    if (loadingModel) {
      try {
        setTimeout(recognize, 400)
      } catch (e) {
        console.error('Timeout Error:', e.message)
        setImgResults({ error: true })
      }
    } else {
      try {
        setProcessing(true)
        recognize()
      } catch (e) {
        console.error('Tesseract Error:', e.message)
        setProcessing(false)
        setImgResults({ error: true })
      }
    }
  }

  const logger = (m) => {
    setProgress(m.progress)
    if (log) {
      console.info(m)
    }
  }

  useEffect(() => {
    const loadTesseract = async () => {
      if (tesseractWorker) {
        await tesseractWorker.loadLanguage(tesseractLanguage)
        await tesseractWorker.initialize(tesseractLanguage)
        console.info(`INFO: loaded ${tesseractLanguage} tesseract model`)
      } else {
        const tesseractWorker = createWorker({
          logger,
          // specify paths because sometimes the free CDN goes down
          // corePath: '/static/tesseract-core.wasm.2.2.0.js',
          // workerPath: '/static/tesseract-worker.v2.1.4.min.js',
        })
        setTesseractWorker(tesseractWorker)
        await tesseractWorker.load()
        await tesseractWorker.loadLanguage(tesseractLanguage)
        await tesseractWorker.initialize(tesseractLanguage)
        console.info(`INFO: loaded ${tesseractLanguage} tesseract model`)
        setLoadingModel(false)
        setModelError(true)
        setLoadingModel(false)
      }
    }
    loadTesseract().catch((e) => {
      console.error(`ERROR: Failed to load tesseract model`, e.message)
      setModelError(true)
      setLoadingModel(false)
    })
    // TODO: Have to add a ref to reference the latest tesseractWorker in order to terminate
    // return () => tesseractWorker.terminate()
  }, [tesseractLanguage])

  return {
    imgResults,
    loadingModel,
    processing,
    modelError,
    progress,
    extractTextFromImage,
  }
}