bluenevus commited on
Commit
60d3e8d
·
verified ·
1 Parent(s): 26cf8bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -27,7 +27,7 @@ whisper_model = WhisperForConditionalGeneration.from_pretrained(whisper_model_na
27
  # Load the Qwen model and tokenizer
28
  qwen_model_name = "Qwen/Qwen2.5-3B-Instruct"
29
  qwen_tokenizer = AutoTokenizer.from_pretrained(qwen_model_name, trust_remote_code=True)
30
- qwen_model = AutoModelForCausalLM.from_pretrained(qwen_model_name, trust_remote_code=True).to(device)
31
 
32
  def download_audio_from_url(url):
33
  try:
@@ -85,7 +85,7 @@ def separate_speakers(transcription):
85
  print("Starting speaker separation...")
86
  prompt = f"""Analyze the following transcribed text and separate it into different speakers. Identify potential speaker changes based on context, content shifts, or dialogue patterns. Format the output as follows:
87
 
88
- 1. Label speakers as "Speaker 1", "Speaker 2", etc.
89
  2. Start each speaker's text on a new line beginning with their label.
90
  3. Separate different speakers' contributions with a blank line.
91
  4. If the same speaker continues, do not insert a blank line or repeat the speaker label.
@@ -96,6 +96,7 @@ Now, please process the following transcribed text:
96
  """
97
 
98
  inputs = qwen_tokenizer(prompt, return_tensors="pt").to(device)
 
99
  with torch.no_grad():
100
  outputs = qwen_model.generate(**inputs, max_new_tokens=4000)
101
  result = qwen_tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -105,7 +106,7 @@ Now, please process the following transcribed text:
105
 
106
  print("Speaker separation complete.")
107
  return processed_text
108
-
109
  def transcribe_video(url):
110
  try:
111
  print(f"Attempting to download audio from URL: {url}")
 
27
  # Load the Qwen model and tokenizer
28
  qwen_model_name = "Qwen/Qwen2.5-3B-Instruct"
29
  qwen_tokenizer = AutoTokenizer.from_pretrained(qwen_model_name, trust_remote_code=True)
30
+ qwen_model = AutoModelForCausalLM.from_pretrained(qwen_model_name, trust_remote_code=True, torch_dtype=torch.float16).to(device)
31
 
32
  def download_audio_from_url(url):
33
  try:
 
85
  print("Starting speaker separation...")
86
  prompt = f"""Analyze the following transcribed text and separate it into different speakers. Identify potential speaker changes based on context, content shifts, or dialogue patterns. Format the output as follows:
87
 
88
+ 1. Label speakers as "Speaker 1", "Speaker 2", etc. You will have to use dialog context to asume which speaker is saying their dialog as that isn't in the text.
89
  2. Start each speaker's text on a new line beginning with their label.
90
  3. Separate different speakers' contributions with a blank line.
91
  4. If the same speaker continues, do not insert a blank line or repeat the speaker label.
 
96
  """
97
 
98
  inputs = qwen_tokenizer(prompt, return_tensors="pt").to(device)
99
+ inputs = {k: v.to(torch.float16) for k, v in inputs.items()} # Convert inputs to float16
100
  with torch.no_grad():
101
  outputs = qwen_model.generate(**inputs, max_new_tokens=4000)
102
  result = qwen_tokenizer.decode(outputs[0], skip_special_tokens=True)
 
106
 
107
  print("Speaker separation complete.")
108
  return processed_text
109
+
110
  def transcribe_video(url):
111
  try:
112
  print(f"Attempting to download audio from URL: {url}")