VoiceBridge / app1.py
ayushkanha's picture
Rename app.py to app1.py
8d2899b verified
raw
history blame
2.84 kB
import streamlit as st
import speech_recognition as sr
from pydub import AudioSegment
import tempfile
import os
# Function to format text into SRT format
def generate_srt(transcriptions):
srt_content = ""
for i, (start_time, end_time, text) in enumerate(transcriptions, 1):
start_srt = f"{int(start_time // 3600):02}:{int((start_time % 3600) // 60):02}:{int(start_time % 60):02},000"
end_srt = f"{int(end_time // 3600):02}:{int((end_time % 3600) // 60):02}:{int(end_time % 60):02},000"
srt_content += f"{i}\n{start_srt} --> {end_srt}\n{text}\n\n"
return srt_content
st.title("πŸŽ™οΈ Audio to Text Transcription App (Google)")
uploaded_file = st.file_uploader("Upload an audio file", type=["mp3", "wav", "m4a"])
if uploaded_file is not None:
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile:
file_path = tmpfile.name
tmpfile.write(uploaded_file.read())
st.audio(uploaded_file, format="audio/wav")
# Convert to WAV
audio = AudioSegment.from_file(file_path)
audio = audio.set_frame_rate(16000).set_channels(1)
audio.export(file_path, format="wav")
recognizer = sr.Recognizer()
transcriptions = []
step = 5
with sr.AudioFile(file_path) as source:
duration = int(source.DURATION) # Get total duration in seconds
# Process in 5-second chunks
for i in range(0, duration, step):
source = sr.AudioFile(file_path) # Reopen file for each iteration
with source as audio_source:
recognizer.adjust_for_ambient_noise(audio_source, duration=0.5)
audio_data = recognizer.record(audio_source, offset=i, duration=step) # Use offset correctly
try:
text = recognizer.recognize_google(audio_data)
transcriptions.append((i, min(i + step, duration), text)) # Store chunk timestamps
except sr.UnknownValueError:
continue # Skip if the chunk is not recognized
except sr.RequestError:
st.error("❌ Google API error. Check internet connection.")
break
# Generate SRT content
srt_text = generate_srt(transcriptions)
# Save SRT file
srt_path = file_path.replace(".wav", ".srt")
with open(srt_path, "w") as srt_file:
srt_file.write(srt_text)
st.success("βœ… Transcription Complete!")
st.text_area("Transcribed Text:", "\n".join([t[2] for t in transcriptions]), height=200)
# Provide SRT file for download
with open(srt_path, "rb") as srt_file:
st.download_button("πŸ“₯ Download SRT File", srt_file, file_name="transcription.srt", mime="text/plain")
# Clean up
os.remove(file_path)
os.remove(srt_path)