How can I transcribe large audio/video files without getting a TimeoutError?

300 Views Asked by At

The code:

Imports:

from deepgram import Deepgram
import asyncio
import os
DEEPGRAM_API_KEY="obviouslynotmyapikey"

async def get_audio_files():
   path_of_the_speeches = 'C:/Users/user/Documents/vid_files'
   for filename in os.listdir(path_of_the_speeches):
       audio_file = os.path.join(path_of_the_speeches,filename)
       if os.path.isfile(audio_file):
           await main(audio_file)
   return audio_file

async def main(file):
   print(f"Speech Name: {file}")
   
   deepgram = Deepgram(DEEPGRAM_API_KEY)

Open the audio file

   with open(file, 'rb') as audio:
       # ...or replace mimetype as appropriate
       source = {'buffer': audio, 'mimetype': 'audio/mp4'}
       transcription_options = {
           "model": "nova",
           "smart_format": True,
           "punctuate": True,
           "paragraphs": True,
           "diarize": True,
       }

If somehow I would be able to raise the timeout limit it would work

response = await deepgram.transcription.prerecorded(source, transcription_options)
       transcript = response['results']['channels'][0]['alternatives'][0]['paragraphs']['transcript']
       print(transcript)

asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
asyncio.run(get_audio_files())

I also tried to use but it's not working either:

url = 'https://api.deepgram.com/v1/listen?model=general&tier=nova&version=latest&punctuate=true&diarize=true&smart_format=true&paragraphs=true'
            timeout = aiohttp.ClientTimeout(total=None)

            async with aiohttp.ClientSession(timeout=timeout) as session:
                async with session.post(url, data=source) as response:
                    response_data = await response.json()

                    print(json.dumps(response_data, indent=4))

I know that it can take about 15 minutes or more till I get a response from Deepgram's API, but there should be a solution to this error:

raise asyncio.TimeoutError from None
asyncio.exceptions.TimeoutError

0

There are 0 best solutions below