ayushkanha commited on
Commit
3f9edb5
·
verified ·
1 Parent(s): ff208e4

Delete app1.py

Browse files
Files changed (1) hide show
  1. app1.py +0 -70
app1.py DELETED
@@ -1,70 +0,0 @@
1
- import streamlit as st
2
- import speech_recognition as sr
3
- from pydub import AudioSegment
4
- import tempfile
5
- import os
6
-
7
- # Function to format text into SRT format
8
- def generate_srt(transcriptions):
9
- srt_content = ""
10
- for i, (start_time, end_time, text) in enumerate(transcriptions, 1):
11
- start_srt = f"{int(start_time // 3600):02}:{int((start_time % 3600) // 60):02}:{int(start_time % 60):02},000"
12
- end_srt = f"{int(end_time // 3600):02}:{int((end_time % 3600) // 60):02}:{int(end_time % 60):02},000"
13
- srt_content += f"{i}\n{start_srt} --> {end_srt}\n{text}\n\n"
14
- return srt_content
15
-
16
- st.title("🎙️ Audio to Text Transcription App (Google)")
17
-
18
- uploaded_file = st.file_uploader("Upload an audio file", type=["mp3", "wav", "m4a"])
19
-
20
- if uploaded_file is not None:
21
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile:
22
- file_path = tmpfile.name
23
- tmpfile.write(uploaded_file.read())
24
-
25
- st.audio(uploaded_file, format="audio/wav")
26
-
27
- # Convert to WAV
28
- audio = AudioSegment.from_file(file_path)
29
- audio = audio.set_frame_rate(16000).set_channels(1)
30
- audio.export(file_path, format="wav")
31
-
32
- recognizer = sr.Recognizer()
33
- transcriptions = []
34
- step = 5
35
- with sr.AudioFile(file_path) as source:
36
- duration = int(source.DURATION) # Get total duration in seconds
37
-
38
- # Process in 5-second chunks
39
- for i in range(0, duration, step):
40
- source = sr.AudioFile(file_path) # Reopen file for each iteration
41
- with source as audio_source:
42
- recognizer.adjust_for_ambient_noise(audio_source, duration=0.5)
43
- audio_data = recognizer.record(audio_source, offset=i, duration=step) # Use offset correctly
44
- try:
45
- text = recognizer.recognize_google(audio_data)
46
- transcriptions.append((i, min(i + step, duration), text)) # Store chunk timestamps
47
- except sr.UnknownValueError:
48
- continue # Skip if the chunk is not recognized
49
- except sr.RequestError:
50
- st.error("❌ Google API error. Check internet connection.")
51
- break
52
-
53
- # Generate SRT content
54
- srt_text = generate_srt(transcriptions)
55
-
56
- # Save SRT file
57
- srt_path = file_path.replace(".wav", ".srt")
58
- with open(srt_path, "w") as srt_file:
59
- srt_file.write(srt_text)
60
-
61
- st.success("✅ Transcription Complete!")
62
- st.text_area("Transcribed Text:", "\n".join([t[2] for t in transcriptions]), height=200)
63
-
64
- # Provide SRT file for download
65
- with open(srt_path, "rb") as srt_file:
66
- st.download_button("📥 Download SRT File", srt_file, file_name="transcription.srt", mime="text/plain")
67
-
68
- # Clean up
69
- os.remove(file_path)
70
- os.remove(srt_path)