I am trying to write code to get the transcripts out of some video files I downloaded. So far I got the following:
def save_audio_transcripts(video_files):
for video_file in video_files:
# This is the code to load a local video file and parse audio
# Transform the downloaded file in "wav"
output_audio_file = os.path.splitext(video_file)[0] + ".wav"
extract_audio(video_file, output_audio_file)
#This is the code to load a YouTube Video and save the audio locally
loader = GenericLoader(
output_audio_file,
OpenAIWhisperParser()
)
#This is the code to transcribe the audio into text and save it a file
docs = loader.load()
output_directory = os.path.dirname(video_file)
output_transcript = output_directory + '/transcripts_' + os.path.splitext(os.path.basename(video_file))[0] + '.txt'
with open(output_transcript, 'w', encoding='utf-8') as f:
#with open(savedir+"/"+transcriptfile, 'w', encoding='latin-1') as f:
for item in docs:
# write each item
f.write(item.page_content)
print('Done')
return docs
where extract_audio is a function that creates a .wav file out of the .mp4 video file.
However, from
docs = loader.load()
I get the following error:
File ~\anaconda3\lib\site-packages\langchain\document_loaders\generic.py:85 in lazy_load for blob in self.blob_loader.yield_blobs():
AttributeError: 'str' object has no attribute 'yield_blobs'
Can I please get some help?