bluenevus commited on
Commit
c982392
·
verified ·
1 Parent(s): dd906ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -54
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import io
2
  import torch
3
- from transformers import WhisperProcessor, WhisperForConditionalGeneration, AutoTokenizer, AutoModelForCausalLM
4
  import requests
5
  from bs4 import BeautifulSoup
6
  import tempfile
@@ -17,7 +17,7 @@ import librosa
17
  import numpy as np
18
 
19
  # Set up logging
20
- logging.basicConfig(level=logging.DEBUG)
21
  logger = logging.getLogger(__name__)
22
 
23
  print("Script started")
@@ -31,11 +31,6 @@ whisper_model_name = "openai/whisper-small"
31
  whisper_processor = WhisperProcessor.from_pretrained(whisper_model_name)
32
  whisper_model = WhisperForConditionalGeneration.from_pretrained(whisper_model_name).to(device)
33
 
34
- # Load the Qwen model and tokenizer
35
- qwen_model_name = "Qwen/Qwen2.5-1.5B-Instruct"
36
- qwen_tokenizer = AutoTokenizer.from_pretrained(qwen_model_name, trust_remote_code=True)
37
- qwen_model = AutoModelForCausalLM.from_pretrained(qwen_model_name, trust_remote_code=True).to(device)
38
-
39
  def download_audio_from_url(url):
40
  try:
41
  if "youtube.com" in url or "youtu.be" in url:
@@ -92,40 +87,11 @@ def transcribe_audio(audio_file):
92
  full_transcription = " ".join(transcriptions)
93
  logger.info(f"Transcription complete. Full transcription length: {len(full_transcription)} characters")
94
 
95
- logger.info("Applying speaker separation using Qwen...")
96
- separated_transcript = separate_speakers(full_transcription)
97
-
98
- return separated_transcript
99
  except Exception as e:
100
  logger.error(f"Error in transcribe_audio: {str(e)}")
101
  raise
102
 
103
- def separate_speakers(transcription):
104
- logger.info("Starting speaker separation...")
105
- prompt = f"""Analyze the following transcribed text and separate it into different speakers. Identify potential speaker changes based on context, content shifts, or dialogue patterns. Format the output as follows:
106
-
107
- 1. Label speakers as "Speaker 1", "Speaker 2", etc.
108
- 2. Start each speaker's text on a new line beginning with their label.
109
- 3. Separate different speakers' contributions with a blank line.
110
- 4. If the same speaker continues, do not insert a blank line or repeat the speaker label.
111
- 5. Do not include any additional explanations or metadata.
112
-
113
- Now, please process the following transcribed text:
114
-
115
- {transcription}
116
- """
117
-
118
- inputs = qwen_tokenizer(prompt, return_tensors="pt").to(device)
119
- with torch.no_grad():
120
- outputs = qwen_model.generate(**inputs, max_new_tokens=4000)
121
- result = qwen_tokenizer.decode(outputs[0], skip_special_tokens=True)
122
-
123
- # Extract only the processed text (remove the instruction part)
124
- processed_text = result.split("Now, please process the following transcribed text:")[-1].strip()
125
-
126
- logger.info("Speaker separation complete.")
127
- return processed_text
128
-
129
  def transcribe_video(url):
130
  try:
131
  logger.info(f"Attempting to download audio from URL: {url}")
@@ -141,18 +107,7 @@ def transcribe_video(url):
141
  if len(transcript) < 10:
142
  raise ValueError("Transcription too short, possibly failed")
143
 
144
- logger.info("Separating speakers...")
145
- try:
146
- diarized_transcript = separate_speakers(transcript)
147
- logger.info(f"Speaker separation complete. Result length: {len(diarized_transcript)} characters")
148
- if len(diarized_transcript) < 10:
149
- logger.warning("Speaker separation result too short, using original transcript")
150
- return transcript
151
- return diarized_transcript
152
- except Exception as e:
153
- logger.error(f"Error during speaker separation: {str(e)}")
154
- logger.info("Returning original transcript without speaker separation")
155
- return transcript
156
  except Exception as e:
157
  error_message = f"An error occurred: {str(e)}"
158
  logger.error(error_message)
@@ -163,8 +118,8 @@ app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
163
  app.layout = dbc.Container([
164
  dbc.Row([
165
  dbc.Col([
166
- html.H1("Video Transcription with Speaker Separation", className="text-center mb-4"),
167
- html.Div("If you can see this, the app is working!", className="text-center mb-4"), # Debug element
168
  dbc.Card([
169
  dbc.CardBody([
170
  dbc.Input(id="video-url", type="text", placeholder="Enter video URL"),
@@ -191,12 +146,28 @@ def update_transcription(n_clicks, url):
191
  if not url:
192
  raise PreventUpdate
193
 
194
- transcript = transcribe_video(url)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
 
196
  if transcript and not transcript.startswith("An error occurred"):
197
  return dbc.Card([
198
  dbc.CardBody([
199
- html.H5("Transcription Result with Speaker Separation"),
200
  html.Pre(transcript, style={"white-space": "pre-wrap", "word-wrap": "break-word"})
201
  ])
202
  ]), {'display': 'block'}
@@ -209,7 +180,6 @@ def update_transcription(n_clicks, url):
209
  State("transcription-output", "children"),
210
  prevent_initial_call=True
211
  )
212
-
213
  def download_transcript(n_clicks, transcription_output):
214
  if not transcription_output:
215
  raise PreventUpdate
 
1
  import io
2
  import torch
3
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
4
  import requests
5
  from bs4 import BeautifulSoup
6
  import tempfile
 
17
  import numpy as np
18
 
19
  # Set up logging
20
+ logging.basicConfig(level=logging.INFO)
21
  logger = logging.getLogger(__name__)
22
 
23
  print("Script started")
 
31
  whisper_processor = WhisperProcessor.from_pretrained(whisper_model_name)
32
  whisper_model = WhisperForConditionalGeneration.from_pretrained(whisper_model_name).to(device)
33
 
 
 
 
 
 
34
  def download_audio_from_url(url):
35
  try:
36
  if "youtube.com" in url or "youtu.be" in url:
 
87
  full_transcription = " ".join(transcriptions)
88
  logger.info(f"Transcription complete. Full transcription length: {len(full_transcription)} characters")
89
 
90
+ return full_transcription
 
 
 
91
  except Exception as e:
92
  logger.error(f"Error in transcribe_audio: {str(e)}")
93
  raise
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  def transcribe_video(url):
96
  try:
97
  logger.info(f"Attempting to download audio from URL: {url}")
 
107
  if len(transcript) < 10:
108
  raise ValueError("Transcription too short, possibly failed")
109
 
110
+ return transcript
 
 
 
 
 
 
 
 
 
 
 
111
  except Exception as e:
112
  error_message = f"An error occurred: {str(e)}"
113
  logger.error(error_message)
 
118
  app.layout = dbc.Container([
119
  dbc.Row([
120
  dbc.Col([
121
+ html.H1("Video Transcription", className="text-center mb-4"),
122
+ html.Div("If you can see this, the app is working!", className="text-center mb-4"),
123
  dbc.Card([
124
  dbc.CardBody([
125
  dbc.Input(id="video-url", type="text", placeholder="Enter video URL"),
 
146
  if not url:
147
  raise PreventUpdate
148
 
149
+ def transcribe():
150
+ try:
151
+ transcript = transcribe_video(url)
152
+ return transcript
153
+ except Exception as e:
154
+ logger.exception("Error in transcription:")
155
+ return f"An error occurred: {str(e)}"
156
+
157
+ # Run transcription in a separate thread
158
+ thread = threading.Thread(target=transcribe)
159
+ thread.start()
160
+ thread.join(timeout=600) # 10 minutes timeout
161
+
162
+ if thread.is_alive():
163
+ return "Transcription timed out after 10 minutes", {'display': 'none'}
164
+
165
+ transcript = getattr(thread, 'result', "Transcription failed")
166
 
167
  if transcript and not transcript.startswith("An error occurred"):
168
  return dbc.Card([
169
  dbc.CardBody([
170
+ html.H5("Transcription Result"),
171
  html.Pre(transcript, style={"white-space": "pre-wrap", "word-wrap": "break-word"})
172
  ])
173
  ]), {'display': 'block'}
 
180
  State("transcription-output", "children"),
181
  prevent_initial_call=True
182
  )
 
183
  def download_transcript(n_clicks, transcription_output):
184
  if not transcription_output:
185
  raise PreventUpdate