Spaces:

ayushkanha
/

VoiceBridge

Running

App Files Files Community

ayushkanha commited on Mar 4

Commit

5499c02

verified ·

1 Parent(s): f49ab35

Upload 3 files

Browse files

Files changed (3) hide show

app.py +70 -0
packages.txt +1 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import streamlit as st
+import speech_recognition as sr
+from pydub import AudioSegment
+import tempfile
+import os
+# Function to format text into SRT format
+def generate_srt(transcriptions):
+    srt_content = ""
+    for i, (start_time, end_time, text) in enumerate(transcriptions, 1):
+        start_srt = f"{int(start_time // 3600):02}:{int((start_time % 3600) // 60):02}:{int(start_time % 60):02},000"
+        end_srt = f"{int(end_time // 3600):02}:{int((end_time % 3600) // 60):02}:{int(end_time % 60):02},000"
+        srt_content += f"{i}\n{start_srt} --> {end_srt}\n{text}\n\n"
+    return srt_content
+st.title("🎙️ Audio to Text Transcription App (Google)")
+uploaded_file = st.file_uploader("Upload an audio file", type=["mp3", "wav", "m4a"])
+if uploaded_file is not None:
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile:
+        file_path = tmpfile.name
+        tmpfile.write(uploaded_file.read())
+    st.audio(uploaded_file, format="audio/wav")
+    # Convert to WAV
+    audio = AudioSegment.from_file(file_path)
+    audio = audio.set_frame_rate(16000).set_channels(1)
+    audio.export(file_path, format="wav")
+    recognizer = sr.Recognizer()
+    transcriptions = []
+    step = 5
+    with sr.AudioFile(file_path) as source:
+        duration = int(source.DURATION)  # Get total duration in seconds
+        # Process in 5-second chunks
+        for i in range(0, duration, step):
+            source = sr.AudioFile(file_path)  # Reopen file for each iteration
+            with source as audio_source:
+                recognizer.adjust_for_ambient_noise(audio_source, duration=0.5)
+                audio_data = recognizer.record(audio_source, offset=i, duration=step)  # Use offset correctly
+            try:
+                text = recognizer.recognize_google(audio_data)
+                transcriptions.append((i, min(i + step, duration), text))  # Store chunk timestamps
+            except sr.UnknownValueError:
+                continue  # Skip if the chunk is not recognized
+            except sr.RequestError:
+                st.error("❌ Google API error. Check internet connection.")
+                break
+    # Generate SRT content
+    srt_text = generate_srt(transcriptions)
+    # Save SRT file
+    srt_path = file_path.replace(".wav", ".srt")
+    with open(srt_path, "w") as srt_file:
+        srt_file.write(srt_text)
+    st.success("✅ Transcription Complete!")
+    st.text_area("Transcribed Text:", "\n".join([t[2] for t in transcriptions]), height=200)
+    # Provide SRT file for download
+    with open(srt_path, "rb") as srt_file:
+        st.download_button("📥 Download SRT File", srt_file, file_name="transcription.srt", mime="text/plain")
+    # Clean up
+    os.remove(file_path)
+    os.remove(srt_path)

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+streamlit
+speechrecognition
+pydub