How can I sync my audio and video together when making a clipping software with Python?

19 Views Asked by At
import cv2
import numpy as np
import pyautogui
import pyaudio
import wave
import os
from moviepy.editor import VideoClip, AudioFileClip
from collections import deque
from keyboard import is_pressed
from datetime import datetime

# Constants
CLIP_DURATION = 120  # Total duration of the clip in seconds
FPS = 30  # Frames per second
SHORTCUT_KEY = "ctrl+shift+c"  # Keyboard shortcut to start/stop recording
AUDIO_CHANNELS = 2  # Stereo audio
AUDIO_SAMPLE_RATE = 44100  # Sample rate in Hz
AUDIO_BUFFER_SIZE = 2048  # Adjust buffer size based on audio distortion

def on_shortcut_press():
    if is_pressed(SHORTCUT_KEY):
        save_clip()
        print(f"Recording stopped at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

def save_clip(frames, audio_frames, start_time):
    print("Saving clip...")
    
    if len(frames) < FPS * 2:  # Check if at least 2 seconds of frames are available
        print("Insufficient frames collected. Aborting clip save.")
        return

    # Set up PyAudio
    audio = pyaudio.PyAudio()
    audio_stream = audio.open(format=pyaudio.paInt16,
                              channels=AUDIO_CHANNELS,
                              rate=AUDIO_SAMPLE_RATE,
                              input=True,
                              frames_per_buffer=AUDIO_BUFFER_SIZE)

    # Write frames to video
    video_clip = VideoClip(make_frame=lambda t: frames[int(t * FPS)], duration=len(frames) / FPS)
    video_name = f"clip_{start_time.year}-{start_time.month}-{start_time.day}_{start_time.hour}-{start_time.minute}-{start_time.second}.mp4"
    video_clip.write_videofile(video_name, fps=FPS, codec='libx264')

    # Write audio to file
    audio_filename = f"audio_{start_time.year}-{start_time.month}-{start_time.day}_{start_time.hour}-{start_time.minute}-{start_time.second}.wav"
    with wave.open(audio_filename, "wb") as wavefile:
        wavefile.setnchannels(AUDIO_CHANNELS)
        wavefile.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
        wavefile.setframerate(AUDIO_SAMPLE_RATE)
        audio_frames_data = b''.join([audio[0] for audio in audio_frames])
        wavefile.writeframes(audio_frames_data)

    # Combine audio with video
    final_video_clip = video_clip.set_audio(AudioFileClip(audio_filename))
    final_clip_name = f"final_clip_{start_time.year}-{start_time.month}-{start_time.day}_{start_time.hour}-{start_time.minute}-{start_time.second}.mp4"
    final_video_clip.write_videofile(final_clip_name, codec='libx264', audio_codec='aac', fps=FPS)
    print(f"Final clip saved as {final_clip_name}")

    # Clean up
    os.remove(video_name)
    os.remove(audio_filename)
    audio_stream.stop_stream()
    audio_stream.close()
    audio.terminate()

def main(audio_stream):
    print("Starting main loop...")
    frames = deque(maxlen=CLIP_DURATION * FPS)
    audio_frames = deque(maxlen=int(CLIP_DURATION * AUDIO_SAMPLE_RATE / AUDIO_BUFFER_SIZE))
    start_time = datetime.now()  # Reference time for the start of recording

    while True:
        # Capture video frame
        frame = np.array(pyautogui.screenshot())
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Capture audio frame
        audio_data = audio_stream.read(AUDIO_BUFFER_SIZE, exception_on_overflow=False)

        # Timestamp frames
        frame_timestamp = datetime.now() - start_time
        audio_timestamp = len(audio_frames) / (AUDIO_SAMPLE_RATE / AUDIO_BUFFER_SIZE)

        # Append frames to queues
        frames.append(frame)
        audio_frames.append((audio_data, audio_timestamp))

        # Check if it's time to save the clip
        if (datetime.now() - start_time).total_seconds() >= CLIP_DURATION or is_pressed(SHORTCUT_KEY):
            save_clip(frames, audio_frames, start_time)
            # Clear frames for the next clip
            frames.clear()
            audio_frames.clear()
            start_time = datetime.now()
            audio_stream = audio.open(format=pyaudio.paInt16,
                                      channels=AUDIO_CHANNELS,
                                      rate=AUDIO_SAMPLE_RATE,
                                      input=True,
                                      frames_per_buffer=AUDIO_BUFFER_SIZE)

if __name__ == "__main__":
    audio = pyaudio.PyAudio()
    audio_stream = audio.open(format=pyaudio.paInt16,
                              channels=AUDIO_CHANNELS,
                              rate=AUDIO_SAMPLE_RATE,
                              input=True,
                              frames_per_buffer=AUDIO_BUFFER_SIZE)
    main(audio_stream)

Above is my current code, and everything works great, but the video is ahead of the audio and is not synced up at all. Any help would be greatly appreciated

I tried to make a clipping software, I expected it to work, it does but the audio and video are not synced. I know if i change the buffer size to anything besides 2048 it gets distorted but beyond that I am stuck big time.

0

There are 0 best solutions below