ayushkanha commited on
Commit
5499c02
·
verified ·
1 Parent(s): f49ab35

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +70 -0
  2. packages.txt +1 -0
  3. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import speech_recognition as sr
3
+ from pydub import AudioSegment
4
+ import tempfile
5
+ import os
6
+
7
+ # Function to format text into SRT format
8
+ def generate_srt(transcriptions):
9
+ srt_content = ""
10
+ for i, (start_time, end_time, text) in enumerate(transcriptions, 1):
11
+ start_srt = f"{int(start_time // 3600):02}:{int((start_time % 3600) // 60):02}:{int(start_time % 60):02},000"
12
+ end_srt = f"{int(end_time // 3600):02}:{int((end_time % 3600) // 60):02}:{int(end_time % 60):02},000"
13
+ srt_content += f"{i}\n{start_srt} --> {end_srt}\n{text}\n\n"
14
+ return srt_content
15
+
16
+ st.title("🎙️ Audio to Text Transcription App (Google)")
17
+
18
+ uploaded_file = st.file_uploader("Upload an audio file", type=["mp3", "wav", "m4a"])
19
+
20
+ if uploaded_file is not None:
21
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile:
22
+ file_path = tmpfile.name
23
+ tmpfile.write(uploaded_file.read())
24
+
25
+ st.audio(uploaded_file, format="audio/wav")
26
+
27
+ # Convert to WAV
28
+ audio = AudioSegment.from_file(file_path)
29
+ audio = audio.set_frame_rate(16000).set_channels(1)
30
+ audio.export(file_path, format="wav")
31
+
32
+ recognizer = sr.Recognizer()
33
+ transcriptions = []
34
+ step = 5
35
+ with sr.AudioFile(file_path) as source:
36
+ duration = int(source.DURATION) # Get total duration in seconds
37
+
38
+ # Process in 5-second chunks
39
+ for i in range(0, duration, step):
40
+ source = sr.AudioFile(file_path) # Reopen file for each iteration
41
+ with source as audio_source:
42
+ recognizer.adjust_for_ambient_noise(audio_source, duration=0.5)
43
+ audio_data = recognizer.record(audio_source, offset=i, duration=step) # Use offset correctly
44
+ try:
45
+ text = recognizer.recognize_google(audio_data)
46
+ transcriptions.append((i, min(i + step, duration), text)) # Store chunk timestamps
47
+ except sr.UnknownValueError:
48
+ continue # Skip if the chunk is not recognized
49
+ except sr.RequestError:
50
+ st.error("❌ Google API error. Check internet connection.")
51
+ break
52
+
53
+ # Generate SRT content
54
+ srt_text = generate_srt(transcriptions)
55
+
56
+ # Save SRT file
57
+ srt_path = file_path.replace(".wav", ".srt")
58
+ with open(srt_path, "w") as srt_file:
59
+ srt_file.write(srt_text)
60
+
61
+ st.success("✅ Transcription Complete!")
62
+ st.text_area("Transcribed Text:", "\n".join([t[2] for t in transcriptions]), height=200)
63
+
64
+ # Provide SRT file for download
65
+ with open(srt_path, "rb") as srt_file:
66
+ st.download_button("📥 Download SRT File", srt_file, file_name="transcription.srt", mime="text/plain")
67
+
68
+ # Clean up
69
+ os.remove(file_path)
70
+ os.remove(srt_path)
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit
2
+ speechrecognition
3
+ pydub
4
+