husseinelsaadi commited on
Commit
4a764f7
Β·
verified Β·
1 Parent(s): cd89a2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -39
app.py CHANGED
@@ -1740,45 +1740,72 @@ import json
1740
  from transformers import BarkModel, AutoProcessor
1741
  import torch, gc
1742
  import whisper
 
 
 
1743
 
1744
  print(torch.cuda.is_available())
1745
  torch.cuda.empty_cache()
1746
  gc.collect()
1747
 
1748
- # Bark TTS
1749
- print("πŸ” Loading Bark model...")
1750
- model_bark = BarkModel.from_pretrained("suno/bark").to("cuda" if torch.cuda.is_available() else "cpu")
1751
- print("βœ… Bark model loaded")
1752
- print("πŸ” Loading Bark processor...")
1753
- processor_bark = AutoProcessor.from_pretrained("suno/bark")
1754
- print("βœ… Bark processor loaded")
1755
- bark_voice_preset = "v2/en_speaker_12"
1756
-
1757
- def bark_tts(text):
1758
- print(f"πŸ” Synthesizing TTS for: {text}")
1759
- inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
1760
- input_ids = inputs["input_ids"].to(model_bark.device)
1761
- start = time.time()
1762
- with torch.no_grad():
1763
- speech_values = model_bark.generate(
1764
- input_ids=input_ids,
1765
- do_sample=True,
1766
- fine_temperature=0.4,
1767
- coarse_temperature=0.8
1768
- )
1769
- print(f"βœ… Bark finished in {round(time.time() - start, 2)}s")
1770
- speech = speech_values.cpu().numpy().squeeze()
1771
- speech = (speech * 32767).astype(np.int16)
1772
- temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
1773
- wavfile.write(temp_wav.name, 22050, speech)
1774
- return temp_wav.name
1775
-
1776
- # Whisper STT
1777
- print("πŸ” Loading Whisper model...")
1778
- whisper_model = whisper.load_model("base", device="cuda")
1779
- print("βœ… Whisper model loaded")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1780
  def whisper_stt(audio_path):
1781
- if not audio_path or not os.path.exists(audio_path): return ""
 
 
 
 
1782
  result = whisper_model.transcribe(audio_path)
1783
  return result["text"]
1784
 
@@ -1790,6 +1817,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
1790
  user_data = gr.State({})
1791
  interview_state = gr.State({})
1792
  missing_fields_state = gr.State([])
 
1793
 
1794
  with gr.Column(visible=True) as user_info_section:
1795
  gr.Markdown("## Candidate Information")
@@ -1808,6 +1836,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
1808
  with gr.Column(visible=False) as interview_pre_section:
1809
  pre_interview_greeting_md = gr.Markdown()
1810
  start_interview_final_btn = gr.Button("Start Interview")
 
1811
 
1812
  with gr.Column(visible=False) as interview_section:
1813
  gr.Markdown("## Interview in Progress")
@@ -1821,6 +1850,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
1821
 
1822
  def validate_start_btn(cv_file, job_desc):
1823
  return gr.update(interactive=(cv_file is not None and hasattr(cv_file, "name") and bool(job_desc and job_desc.strip())))
 
1824
  cv_file.change(validate_start_btn, [cv_file, job_desc], start_btn)
1825
  job_desc.change(validate_start_btn, [cv_file, job_desc], start_btn)
1826
 
@@ -1839,17 +1869,20 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
1839
  else:
1840
  greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' when ready."
1841
  return data, missing, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True, value=greeting)
 
1842
  start_btn.click(process_and_route_initial, [cv_file, job_desc], [user_data, missing_fields_state, user_info_section, missing_section, pre_interview_greeting_md])
1843
 
1844
  def show_missing(missing):
1845
  if missing is None: missing = []
1846
  return gr.update(visible="name" in missing), gr.update(visible="job_role" in missing), gr.update(visible="seniority" in missing), gr.update(visible="skills" in missing)
 
1847
  missing_fields_state.change(show_missing, missing_fields_state, [name_in, role_in, seniority_in, skills_in])
1848
 
1849
  def validate_fields(name, role, seniority, skills, missing):
1850
  if not missing: return gr.update(interactive=False)
1851
  all_filled = all([(not ("name" in missing) or bool(name.strip())), (not ("job_role" in missing) or bool(role.strip())), (not ("seniority" in missing) or bool(seniority)), (not ("skills" in missing) or bool(skills.strip()))])
1852
  return gr.update(interactive=all_filled)
 
1853
  for inp in [name_in, role_in, seniority_in, skills_in]:
1854
  inp.change(validate_fields, [name_in, role_in, seniority_in, skills_in, missing_fields_state], submit_btn)
1855
 
@@ -1860,14 +1893,18 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
1860
  if not data["skills"]: data["skills"] = [s.strip() for s in skills.split(",")]
1861
  greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' to begin."
1862
  return data, gr.update(visible=False), gr.update(visible=True), gr.update(value=greeting)
 
1863
  submit_btn.click(complete_manual, [user_data, name_in, role_in, seniority_in, skills_in], [user_data, missing_section, interview_pre_section, pre_interview_greeting_md])
1864
 
1865
- def start_interview(data):
 
1866
  state = {
1867
  "questions": [], "answers": [], "timings": [], "question_evaluations": [], "answer_evaluations": [],
1868
  "conversation_history": [], "difficulty_adjustment": None, "question_idx": 0, "max_questions": 3,
1869
  "q_start_time": time.time(), "log": []
1870
  }
 
 
1871
  context = ""
1872
  prompt = build_interview_prompt(
1873
  conversation_history=[], user_response="", context=context, job_role=data["job_role"],
@@ -1875,16 +1912,50 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
1875
  )
1876
  first_q = groq_llm.predict(prompt)
1877
  q_eval = {"Score": "N/A", "Reasoning": "Skipped to reduce processing time", "Improvements": []}
 
1878
  state["questions"].append(first_q)
1879
  state["question_evaluations"].append(q_eval)
1880
  state["conversation_history"].append({'role': 'Interviewer', 'content': first_q})
1881
- audio_path = bark_tts(first_q)
1882
  state["log"].append({"type": "question", "question": first_q, "question_eval": q_eval, "timestamp": time.time()})
1883
- return state, gr.update(visible=False), gr.update(visible=True), audio_path, f"*Question 1:* {first_q}"
1884
- start_interview_final_btn.click(start_interview, [user_data], [interview_state, interview_pre_section, interview_section, question_audio, question_text])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1885
 
1886
  def transcribe(audio_path):
1887
  return whisper_stt(audio_path)
 
1888
  user_audio_input.change(transcribe, user_audio_input, stt_transcript)
1889
 
1890
  def process_answer(transcript, audio_path, state, data):
@@ -1948,7 +2019,12 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
1948
  state["question_evaluations"].append(q_eval)
1949
  state["conversation_history"].append({'role': 'Interviewer', 'content': next_q})
1950
  state["log"].append({"type": "question", "question": next_q, "question_eval": q_eval, "timestamp": time.time()})
1951
- audio_path = bark_tts(next_q)
 
 
 
 
 
1952
  eval_md = f"*Last Answer Eval:* {answer_eval}"
1953
  return state, gr.update(visible=False), audio_path, f"*Question {qidx + 1}:* {next_q}", gr.update(value=None), gr.update(value=None), gr.update(visible=True, value=eval_md)
1954
 
@@ -1962,4 +2038,3 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
1962
 
1963
  demo.launch(debug=True)
1964
 
1965
-
 
1740
  from transformers import BarkModel, AutoProcessor
1741
  import torch, gc
1742
  import whisper
1743
+ import asyncio
1744
+ import threading
1745
+ from concurrent.futures import ThreadPoolExecutor
1746
 
1747
  print(torch.cuda.is_available())
1748
  torch.cuda.empty_cache()
1749
  gc.collect()
1750
 
1751
+ # Global variables for lazy loading
1752
+ model_bark = None
1753
+ processor_bark = None
1754
+ whisper_model = None
1755
+ bark_voice_preset = "v2/en_speaker_9"
1756
+
1757
+ # Thread pool for async operations
1758
+ executor = ThreadPoolExecutor(max_workers=2)
1759
+
1760
+ def load_models_lazy():
1761
+ """Load models only when needed"""
1762
+ global model_bark, processor_bark, whisper_model
1763
+
1764
+ if model_bark is None:
1765
+ print("πŸ” Loading Bark model...")
1766
+ model_bark = BarkModel.from_pretrained("suno/bark").to("cuda" if torch.cuda.is_available() else "cpu")
1767
+ print("βœ… Bark model loaded")
1768
+
1769
+ if processor_bark is None:
1770
+ print("πŸ” Loading Bark processor...")
1771
+ processor_bark = AutoProcessor.from_pretrained("suno/bark")
1772
+ print("βœ… Bark processor loaded")
1773
+
1774
+ if whisper_model is None:
1775
+ print("πŸ” Loading Whisper model...")
1776
+ whisper_model = whisper.load_model("base", device="cuda")
1777
+ print("βœ… Whisper model loaded")
1778
+
1779
+ def bark_tts_async(text):
1780
+ """Async TTS generation"""
1781
+ def _generate():
1782
+ load_models_lazy() # Load only when needed
1783
+ print(f"πŸ” Synthesizing TTS for: {text}")
1784
+ inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
1785
+ input_ids = inputs["input_ids"].to(model_bark.device)
1786
+ start = time.time()
1787
+ with torch.no_grad():
1788
+ speech_values = model_bark.generate(
1789
+ input_ids=input_ids,
1790
+ do_sample=True,
1791
+ fine_temperature=0.4,
1792
+ coarse_temperature=0.8
1793
+ )
1794
+ print(f"βœ… Bark finished in {round(time.time() - start, 2)}s")
1795
+ speech = speech_values.cpu().numpy().squeeze()
1796
+ speech = (speech * 32767).astype(np.int16)
1797
+ temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
1798
+ wavfile.write(temp_wav.name, 22050, speech)
1799
+ return temp_wav.name
1800
+
1801
+ return executor.submit(_generate)
1802
+
1803
  def whisper_stt(audio_path):
1804
+ """Lazy loading whisper STT"""
1805
+ if not audio_path or not os.path.exists(audio_path):
1806
+ return ""
1807
+
1808
+ load_models_lazy() # Load only when needed
1809
  result = whisper_model.transcribe(audio_path)
1810
  return result["text"]
1811
 
 
1817
  user_data = gr.State({})
1818
  interview_state = gr.State({})
1819
  missing_fields_state = gr.State([])
1820
+ tts_future = gr.State(None) # Store async TTS future
1821
 
1822
  with gr.Column(visible=True) as user_info_section:
1823
  gr.Markdown("## Candidate Information")
 
1836
  with gr.Column(visible=False) as interview_pre_section:
1837
  pre_interview_greeting_md = gr.Markdown()
1838
  start_interview_final_btn = gr.Button("Start Interview")
1839
+ loading_status = gr.Markdown("", visible=False)
1840
 
1841
  with gr.Column(visible=False) as interview_section:
1842
  gr.Markdown("## Interview in Progress")
 
1850
 
1851
  def validate_start_btn(cv_file, job_desc):
1852
  return gr.update(interactive=(cv_file is not None and hasattr(cv_file, "name") and bool(job_desc and job_desc.strip())))
1853
+
1854
  cv_file.change(validate_start_btn, [cv_file, job_desc], start_btn)
1855
  job_desc.change(validate_start_btn, [cv_file, job_desc], start_btn)
1856
 
 
1869
  else:
1870
  greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' when ready."
1871
  return data, missing, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True, value=greeting)
1872
+
1873
  start_btn.click(process_and_route_initial, [cv_file, job_desc], [user_data, missing_fields_state, user_info_section, missing_section, pre_interview_greeting_md])
1874
 
1875
  def show_missing(missing):
1876
  if missing is None: missing = []
1877
  return gr.update(visible="name" in missing), gr.update(visible="job_role" in missing), gr.update(visible="seniority" in missing), gr.update(visible="skills" in missing)
1878
+
1879
  missing_fields_state.change(show_missing, missing_fields_state, [name_in, role_in, seniority_in, skills_in])
1880
 
1881
  def validate_fields(name, role, seniority, skills, missing):
1882
  if not missing: return gr.update(interactive=False)
1883
  all_filled = all([(not ("name" in missing) or bool(name.strip())), (not ("job_role" in missing) or bool(role.strip())), (not ("seniority" in missing) or bool(seniority)), (not ("skills" in missing) or bool(skills.strip()))])
1884
  return gr.update(interactive=all_filled)
1885
+
1886
  for inp in [name_in, role_in, seniority_in, skills_in]:
1887
  inp.change(validate_fields, [name_in, role_in, seniority_in, skills_in, missing_fields_state], submit_btn)
1888
 
 
1893
  if not data["skills"]: data["skills"] = [s.strip() for s in skills.split(",")]
1894
  greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' to begin."
1895
  return data, gr.update(visible=False), gr.update(visible=True), gr.update(value=greeting)
1896
+
1897
  submit_btn.click(complete_manual, [user_data, name_in, role_in, seniority_in, skills_in], [user_data, missing_section, interview_pre_section, pre_interview_greeting_md])
1898
 
1899
+ def start_interview_immediate(data):
1900
+ """Start interview immediately, begin TTS generation in background"""
1901
  state = {
1902
  "questions": [], "answers": [], "timings": [], "question_evaluations": [], "answer_evaluations": [],
1903
  "conversation_history": [], "difficulty_adjustment": None, "question_idx": 0, "max_questions": 3,
1904
  "q_start_time": time.time(), "log": []
1905
  }
1906
+
1907
+ # Generate question text first (fast)
1908
  context = ""
1909
  prompt = build_interview_prompt(
1910
  conversation_history=[], user_response="", context=context, job_role=data["job_role"],
 
1912
  )
1913
  first_q = groq_llm.predict(prompt)
1914
  q_eval = {"Score": "N/A", "Reasoning": "Skipped to reduce processing time", "Improvements": []}
1915
+
1916
  state["questions"].append(first_q)
1917
  state["question_evaluations"].append(q_eval)
1918
  state["conversation_history"].append({'role': 'Interviewer', 'content': first_q})
 
1919
  state["log"].append({"type": "question", "question": first_q, "question_eval": q_eval, "timestamp": time.time()})
1920
+
1921
+ # Start TTS generation in background
1922
+ tts_future_obj = bark_tts_async(first_q)
1923
+
1924
+ # Return immediately with loading message
1925
+ return (state, tts_future_obj,
1926
+ gr.update(visible=False),
1927
+ gr.update(visible=True),
1928
+ gr.update(visible=True, value="πŸ”„ Generating audio..."),
1929
+ gr.update(value=None),
1930
+ f"*Question 1:* {first_q}")
1931
+
1932
+ def check_tts_ready(state, tts_future_obj):
1933
+ """Check if TTS is ready and update audio"""
1934
+ if tts_future_obj and tts_future_obj.done():
1935
+ try:
1936
+ audio_path = tts_future_obj.result()
1937
+ return gr.update(value=audio_path), gr.update(visible=False), None
1938
+ except Exception as e:
1939
+ print(f"TTS Error: {e}")
1940
+ return gr.update(value=None), gr.update(value=f"Error generating audio: {e}"), None
1941
+ else:
1942
+ return gr.update(), gr.update(), tts_future_obj
1943
+
1944
+ start_interview_final_btn.click(
1945
+ start_interview_immediate,
1946
+ [user_data],
1947
+ [interview_state, tts_future, interview_pre_section, interview_section, loading_status, question_audio, question_text]
1948
+ ).then(
1949
+ # Check TTS status every 500ms
1950
+ check_tts_ready,
1951
+ [interview_state, tts_future],
1952
+ [question_audio, loading_status, tts_future],
1953
+ every=0.5
1954
+ )
1955
 
1956
  def transcribe(audio_path):
1957
  return whisper_stt(audio_path)
1958
+
1959
  user_audio_input.change(transcribe, user_audio_input, stt_transcript)
1960
 
1961
  def process_answer(transcript, audio_path, state, data):
 
2019
  state["question_evaluations"].append(q_eval)
2020
  state["conversation_history"].append({'role': 'Interviewer', 'content': next_q})
2021
  state["log"].append({"type": "question", "question": next_q, "question_eval": q_eval, "timestamp": time.time()})
2022
+
2023
+ # Generate TTS asynchronously for next question too
2024
+ audio_future = bark_tts_async(next_q)
2025
+ # For now, we'll wait for it (you can make this async too)
2026
+ audio_path = audio_future.result()
2027
+
2028
  eval_md = f"*Last Answer Eval:* {answer_eval}"
2029
  return state, gr.update(visible=False), audio_path, f"*Question {qidx + 1}:* {next_q}", gr.update(value=None), gr.update(value=None), gr.update(visible=True, value=eval_md)
2030
 
 
2038
 
2039
  demo.launch(debug=True)
2040