Spaces:
Running
Running
Upload 3 files
Browse files- app.py +70 -0
- packages.txt +1 -0
- requirements.txt +4 -0
app.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import speech_recognition as sr
|
3 |
+
from pydub import AudioSegment
|
4 |
+
import tempfile
|
5 |
+
import os
|
6 |
+
|
7 |
+
# Function to format text into SRT format
|
8 |
+
def generate_srt(transcriptions):
|
9 |
+
srt_content = ""
|
10 |
+
for i, (start_time, end_time, text) in enumerate(transcriptions, 1):
|
11 |
+
start_srt = f"{int(start_time // 3600):02}:{int((start_time % 3600) // 60):02}:{int(start_time % 60):02},000"
|
12 |
+
end_srt = f"{int(end_time // 3600):02}:{int((end_time % 3600) // 60):02}:{int(end_time % 60):02},000"
|
13 |
+
srt_content += f"{i}\n{start_srt} --> {end_srt}\n{text}\n\n"
|
14 |
+
return srt_content
|
15 |
+
|
16 |
+
st.title("🎙️ Audio to Text Transcription App (Google)")
|
17 |
+
|
18 |
+
uploaded_file = st.file_uploader("Upload an audio file", type=["mp3", "wav", "m4a"])
|
19 |
+
|
20 |
+
if uploaded_file is not None:
|
21 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile:
|
22 |
+
file_path = tmpfile.name
|
23 |
+
tmpfile.write(uploaded_file.read())
|
24 |
+
|
25 |
+
st.audio(uploaded_file, format="audio/wav")
|
26 |
+
|
27 |
+
# Convert to WAV
|
28 |
+
audio = AudioSegment.from_file(file_path)
|
29 |
+
audio = audio.set_frame_rate(16000).set_channels(1)
|
30 |
+
audio.export(file_path, format="wav")
|
31 |
+
|
32 |
+
recognizer = sr.Recognizer()
|
33 |
+
transcriptions = []
|
34 |
+
step = 5
|
35 |
+
with sr.AudioFile(file_path) as source:
|
36 |
+
duration = int(source.DURATION) # Get total duration in seconds
|
37 |
+
|
38 |
+
# Process in 5-second chunks
|
39 |
+
for i in range(0, duration, step):
|
40 |
+
source = sr.AudioFile(file_path) # Reopen file for each iteration
|
41 |
+
with source as audio_source:
|
42 |
+
recognizer.adjust_for_ambient_noise(audio_source, duration=0.5)
|
43 |
+
audio_data = recognizer.record(audio_source, offset=i, duration=step) # Use offset correctly
|
44 |
+
try:
|
45 |
+
text = recognizer.recognize_google(audio_data)
|
46 |
+
transcriptions.append((i, min(i + step, duration), text)) # Store chunk timestamps
|
47 |
+
except sr.UnknownValueError:
|
48 |
+
continue # Skip if the chunk is not recognized
|
49 |
+
except sr.RequestError:
|
50 |
+
st.error("❌ Google API error. Check internet connection.")
|
51 |
+
break
|
52 |
+
|
53 |
+
# Generate SRT content
|
54 |
+
srt_text = generate_srt(transcriptions)
|
55 |
+
|
56 |
+
# Save SRT file
|
57 |
+
srt_path = file_path.replace(".wav", ".srt")
|
58 |
+
with open(srt_path, "w") as srt_file:
|
59 |
+
srt_file.write(srt_text)
|
60 |
+
|
61 |
+
st.success("✅ Transcription Complete!")
|
62 |
+
st.text_area("Transcribed Text:", "\n".join([t[2] for t in transcriptions]), height=200)
|
63 |
+
|
64 |
+
# Provide SRT file for download
|
65 |
+
with open(srt_path, "rb") as srt_file:
|
66 |
+
st.download_button("📥 Download SRT File", srt_file, file_name="transcription.srt", mime="text/plain")
|
67 |
+
|
68 |
+
# Clean up
|
69 |
+
os.remove(file_path)
|
70 |
+
os.remove(srt_path)
|
packages.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
ffmpeg
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
speechrecognition
|
3 |
+
pydub
|
4 |
+
|