bluenevus commited on
Commit
54c226c
·
verified ·
1 Parent(s): 261d49a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -27,7 +27,7 @@ whisper_model = WhisperForConditionalGeneration.from_pretrained(whisper_model_na
27
  # Load the Qwen model and tokenizer
28
  qwen_model_name = "Qwen/Qwen2.5-3B-Instruct"
29
  qwen_tokenizer = AutoTokenizer.from_pretrained(qwen_model_name, trust_remote_code=True)
30
- qwen_model = AutoModelForCausalLM.from_pretrained(qwen_model_name, trust_remote_code=True, torch_dtype=torch.float16).to(device)
31
 
32
  def download_audio_from_url(url):
33
  try:
@@ -86,6 +86,8 @@ def transcribe_audio(audio_file):
86
  transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)
87
 
88
  print(f"Transcription complete. Length: {len(transcription[0])} characters")
 
 
89
  return transcription[0]
90
  except Exception as e:
91
  print(f"Error in transcribe_audio: {str(e)}")
@@ -95,7 +97,7 @@ def separate_speakers(transcription):
95
  print("Starting speaker separation...")
96
  prompt = f"""Analyze the following transcribed text and separate it into different speakers. Identify potential speaker changes based on context, content shifts, or dialogue patterns. Format the output as follows:
97
 
98
- 1. Label speakers as "Speaker 1", "Speaker 2", etc. You will have to use dialog context to asume which speaker is saying their dialog as that isn't in the text.
99
  2. Start each speaker's text on a new line beginning with their label.
100
  3. Separate different speakers' contributions with a blank line.
101
  4. If the same speaker continues, do not insert a blank line or repeat the speaker label.
@@ -106,7 +108,6 @@ Now, please process the following transcribed text:
106
  """
107
 
108
  inputs = qwen_tokenizer(prompt, return_tensors="pt").to(device)
109
- inputs = {k: v.to(torch.float16) for k, v in inputs.items()} # Convert inputs to float16
110
  with torch.no_grad():
111
  outputs = qwen_model.generate(**inputs, max_new_tokens=4000)
112
  result = qwen_tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -116,7 +117,7 @@ Now, please process the following transcribed text:
116
 
117
  print("Speaker separation complete.")
118
  return processed_text
119
-
120
  def transcribe_video(url):
121
  try:
122
  print(f"Attempting to download audio from URL: {url}")
@@ -129,6 +130,9 @@ def transcribe_video(url):
129
 
130
  os.unlink(temp_audio.name)
131
 
 
 
 
132
  print("Separating speakers...")
133
  separated_transcript = separate_speakers(transcript)
134
 
 
27
  # Load the Qwen model and tokenizer
28
  qwen_model_name = "Qwen/Qwen2.5-3B-Instruct"
29
  qwen_tokenizer = AutoTokenizer.from_pretrained(qwen_model_name, trust_remote_code=True)
30
+ qwen_model = AutoModelForCausalLM.from_pretrained(qwen_model_name, trust_remote_code=True).to(device)
31
 
32
  def download_audio_from_url(url):
33
  try:
 
86
  transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)
87
 
88
  print(f"Transcription complete. Length: {len(transcription[0])} characters")
89
+ if len(transcription[0]) < 10:
90
+ raise ValueError(f"Transcription too short: {transcription[0]}")
91
  return transcription[0]
92
  except Exception as e:
93
  print(f"Error in transcribe_audio: {str(e)}")
 
97
  print("Starting speaker separation...")
98
  prompt = f"""Analyze the following transcribed text and separate it into different speakers. Identify potential speaker changes based on context, content shifts, or dialogue patterns. Format the output as follows:
99
 
100
+ 1. Label speakers as "Speaker 1", "Speaker 2", etc.
101
  2. Start each speaker's text on a new line beginning with their label.
102
  3. Separate different speakers' contributions with a blank line.
103
  4. If the same speaker continues, do not insert a blank line or repeat the speaker label.
 
108
  """
109
 
110
  inputs = qwen_tokenizer(prompt, return_tensors="pt").to(device)
 
111
  with torch.no_grad():
112
  outputs = qwen_model.generate(**inputs, max_new_tokens=4000)
113
  result = qwen_tokenizer.decode(outputs[0], skip_special_tokens=True)
 
117
 
118
  print("Speaker separation complete.")
119
  return processed_text
120
+
121
  def transcribe_video(url):
122
  try:
123
  print(f"Attempting to download audio from URL: {url}")
 
130
 
131
  os.unlink(temp_audio.name)
132
 
133
+ if len(transcript) < 10:
134
+ raise ValueError("Transcription too short, possibly failed")
135
+
136
  print("Separating speakers...")
137
  separated_transcript = separate_speakers(transcript)
138