husseinelsaadi commited on
Commit
57677b5
·
verified ·
1 Parent(s): be4261f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +226 -27
app.py CHANGED
@@ -1857,26 +1857,35 @@ def extract_candidate_details(file_path):
1857
  # )
1858
 
1859
  # demo.launch(debug=True)
1860
-
1861
  import gradio as gr
1862
  import time
1863
  import tempfile
1864
  import numpy as np
1865
  import scipy.io.wavfile as wavfile
1866
  import os
 
1867
  import torch
1868
  import whisper
1869
  from transformers import BarkModel, AutoProcessor
1870
 
1871
- # Initialize Bark (TTS)
1872
- model_bark = BarkModel.from_pretrained("suno/bark").to("cuda" if torch.cuda.is_available() else "cpu")
 
 
 
 
1873
  processor_bark = AutoProcessor.from_pretrained("suno/bark")
 
1874
  bark_voice_preset = "v2/en_speaker_6"
 
1875
 
1876
- # Initialize Whisper (STT)
1877
  whisper_model = whisper.load_model("base", device="cuda" if torch.cuda.is_available() else "cpu")
 
1878
 
1879
  def bark_tts(text):
 
 
1880
  inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
1881
  inputs = {k: v.to(model_bark.device) for k, v in inputs.items()}
1882
  speech_values = model_bark.generate(**inputs)
@@ -1887,33 +1896,223 @@ def bark_tts(text):
1887
  return temp_wav.name
1888
 
1889
  def whisper_stt(audio_path):
 
1890
  if not audio_path or not os.path.exists(audio_path):
1891
  return ""
1892
  result = whisper_model.transcribe(audio_path)
1893
  return result["text"]
1894
 
1895
- # Dummy Groq API stub (replace with actual logic)
1896
- def groq_llm_predict(prompt):
1897
- return f"[Mock Question] Based on: {prompt}" # Replace with groq_llm.predict(prompt)
1898
-
1899
- def interview_loop(state, audio_path):
1900
- transcript = whisper_stt(audio_path)
1901
- state["conversation"].append({"role": "Candidate", "content": transcript})
1902
-
1903
- prompt = "\n".join([f"{turn['role']}: {turn['content']}" for turn in state["conversation"]])
1904
- next_q = groq_llm_predict(prompt)
1905
- state["conversation"].append({"role": "Interviewer", "content": next_q})
1906
-
1907
- audio_out = bark_tts(next_q)
1908
- return state, audio_out, transcript
1909
-
1910
- with gr.Blocks() as demo:
1911
- state = gr.State({"conversation": []})
1912
- question_audio = gr.Audio(label="Interviewer's Question", interactive=False, autoplay=True)
1913
- user_audio_input = gr.Audio(source="microphone", type="filepath", label="Your Answer")
1914
- transcript_box = gr.Textbox(label="Transcript", interactive=False)
1915
-
1916
- user_audio_input.change(interview_loop, [state, user_audio_input], [state, question_audio, transcript_box])
1917
 
1918
- demo.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1919
 
 
 
 
1857
  # )
1858
 
1859
  # demo.launch(debug=True)
 
1860
  import gradio as gr
1861
  import time
1862
  import tempfile
1863
  import numpy as np
1864
  import scipy.io.wavfile as wavfile
1865
  import os
1866
+ import json
1867
  import torch
1868
  import whisper
1869
  from transformers import BarkModel, AutoProcessor
1870
 
1871
+ # Clear GPU memory
1872
+ torch.cuda.empty_cache()
1873
+
1874
+ # Load only essential models
1875
+ print("🔁 Loading Bark model...")
1876
+ model_bark = BarkModel.from_pretrained("suno/bark")
1877
  processor_bark = AutoProcessor.from_pretrained("suno/bark")
1878
+ model_bark.to("cuda" if torch.cuda.is_available() else "cpu")
1879
  bark_voice_preset = "v2/en_speaker_6"
1880
+ print("✅ Bark model loaded")
1881
 
1882
+ print("🔁 Loading Whisper model...")
1883
  whisper_model = whisper.load_model("base", device="cuda" if torch.cuda.is_available() else "cpu")
1884
+ print("✅ Whisper model loaded")
1885
 
1886
  def bark_tts(text):
1887
+ """Convert text to speech using Bark"""
1888
+ print(f"🔁 Synthesizing TTS for: {text}")
1889
  inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
1890
  inputs = {k: v.to(model_bark.device) for k, v in inputs.items()}
1891
  speech_values = model_bark.generate(**inputs)
 
1896
  return temp_wav.name
1897
 
1898
  def whisper_stt(audio_path):
1899
+ """Convert speech to text using Whisper"""
1900
  if not audio_path or not os.path.exists(audio_path):
1901
  return ""
1902
  result = whisper_model.transcribe(audio_path)
1903
  return result["text"]
1904
 
1905
+ # Simple predefined questions for quick interview
1906
+ INTERVIEW_QUESTIONS = [
1907
+ "Tell me about yourself and your background.",
1908
+ "What are your greatest strengths and how do they relate to this role?"
1909
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1910
 
1911
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
1912
+ # State variables
1913
+ interview_state = gr.State({"current_question": 0, "answers": [], "started": False})
1914
+
1915
+ # UI Layout
1916
+ with gr.Column(visible=True) as start_section:
1917
+ gr.Markdown("## Quick Interview Setup")
1918
+ name_input = gr.Textbox(label="Your Name", placeholder="Enter your name")
1919
+ role_input = gr.Textbox(label="Job Role", placeholder="e.g., Software Engineer")
1920
+ start_btn = gr.Button("Start Interview", variant="primary")
1921
+
1922
+ with gr.Column(visible=False) as interview_section:
1923
+ gr.Markdown("## Interview in Progress")
1924
+ question_audio = gr.Audio(label="Listen to Question", interactive=False, autoplay=True)
1925
+ question_text = gr.Markdown()
1926
+
1927
+ # Audio input for user response
1928
+ user_audio_input = gr.Audio(
1929
+ sources=["microphone"],
1930
+ type="filepath",
1931
+ label="Record Your Answer"
1932
+ )
1933
+
1934
+ # Transcription display
1935
+ transcript_display = gr.Textbox(
1936
+ label="Your Answer (Transcribed)",
1937
+ interactive=False
1938
+ )
1939
+
1940
+ # Control buttons
1941
+ with gr.Row():
1942
+ submit_answer_btn = gr.Button("Submit Answer", variant="primary")
1943
+ next_question_btn = gr.Button("Next Question", visible=False)
1944
+
1945
+ # Progress indicator
1946
+ progress_text = gr.Markdown()
1947
+
1948
+ # Interview summary
1949
+ interview_summary = gr.Markdown(visible=False)
1950
+
1951
+ # Event handlers
1952
+ def start_interview(name, role):
1953
+ """Initialize interview with first question"""
1954
+ if not name.strip() or not role.strip():
1955
+ return (
1956
+ gr.update(),
1957
+ gr.update(),
1958
+ gr.update(),
1959
+ gr.update(),
1960
+ gr.update(value="Please fill in both name and role fields."),
1961
+ {"current_question": 0, "answers": [], "started": False, "name": "", "role": ""}
1962
+ )
1963
+
1964
+ # Generate first question audio
1965
+ question_text = INTERVIEW_QUESTIONS[0]
1966
+ audio_path = bark_tts(question_text)
1967
+
1968
+ state = {
1969
+ "current_question": 0,
1970
+ "answers": [],
1971
+ "started": True,
1972
+ "name": name,
1973
+ "role": role
1974
+ }
1975
+
1976
+ return (
1977
+ gr.update(visible=False), # start_section
1978
+ gr.update(visible=True), # interview_section
1979
+ audio_path, # question_audio
1980
+ f"**Question 1:** {question_text}", # question_text
1981
+ gr.update(value=f"Question 1 of {len(INTERVIEW_QUESTIONS)}"), # progress_text
1982
+ state
1983
+ )
1984
+
1985
+ def transcribe_audio(audio_path):
1986
+ """Transcribe user's audio input"""
1987
+ if not audio_path:
1988
+ return ""
1989
+ return whisper_stt(audio_path)
1990
+
1991
+ def submit_answer(transcript, state):
1992
+ """Process submitted answer and prepare for next question"""
1993
+ if not transcript.strip():
1994
+ return (
1995
+ gr.update(),
1996
+ gr.update(),
1997
+ gr.update(),
1998
+ gr.update(),
1999
+ gr.update(value="Please record an answer before submitting."),
2000
+ gr.update(),
2001
+ state
2002
+ )
2003
+
2004
+ # Store the answer
2005
+ state["answers"].append({
2006
+ "question": INTERVIEW_QUESTIONS[state["current_question"]],
2007
+ "answer": transcript,
2008
+ "timestamp": time.time()
2009
+ })
2010
+
2011
+ current_q = state["current_question"]
2012
+
2013
+ # Check if this was the last question
2014
+ if current_q >= len(INTERVIEW_QUESTIONS) - 1:
2015
+ # Interview complete
2016
+ summary = generate_interview_summary(state)
2017
+ return (
2018
+ gr.update(visible=False), # submit_answer_btn
2019
+ gr.update(visible=False), # next_question_btn
2020
+ gr.update(value=None), # question_audio
2021
+ gr.update(value="**Interview Complete!**"), # question_text
2022
+ gr.update(value="Interview finished. Thank you!"), # progress_text
2023
+ gr.update(visible=True, value=summary), # interview_summary
2024
+ state
2025
+ )
2026
+ else:
2027
+ # Show next question button
2028
+ return (
2029
+ gr.update(visible=False), # submit_answer_btn
2030
+ gr.update(visible=True), # next_question_btn
2031
+ gr.update(), # question_audio
2032
+ gr.update(), # question_text
2033
+ gr.update(value=f"Answer submitted! Ready for question {current_q + 2}?"), # progress_text
2034
+ gr.update(), # interview_summary
2035
+ state
2036
+ )
2037
+
2038
+ def next_question(state):
2039
+ """Move to next question"""
2040
+ state["current_question"] += 1
2041
+ current_q = state["current_question"]
2042
+
2043
+ if current_q < len(INTERVIEW_QUESTIONS):
2044
+ question_text = INTERVIEW_QUESTIONS[current_q]
2045
+ audio_path = bark_tts(question_text)
2046
+
2047
+ return (
2048
+ gr.update(visible=True), # submit_answer_btn
2049
+ gr.update(visible=False), # next_question_btn
2050
+ audio_path, # question_audio
2051
+ f"**Question {current_q + 1}:** {question_text}", # question_text
2052
+ gr.update(value=f"Question {current_q + 1} of {len(INTERVIEW_QUESTIONS)}"), # progress_text
2053
+ gr.update(value=None), # user_audio_input (clear)
2054
+ gr.update(value=""), # transcript_display (clear)
2055
+ state
2056
+ )
2057
+ else:
2058
+ # This shouldn't happen, but handle gracefully
2059
+ return (
2060
+ gr.update(),
2061
+ gr.update(),
2062
+ gr.update(),
2063
+ gr.update(value="Interview Complete!"),
2064
+ gr.update(value="All questions answered."),
2065
+ gr.update(),
2066
+ gr.update(),
2067
+ state
2068
+ )
2069
+
2070
+ def generate_interview_summary(state):
2071
+ """Generate final interview summary"""
2072
+ summary = f"# Interview Summary\n\n"
2073
+ summary += f"**Candidate:** {state['name']}\n"
2074
+ summary += f"**Role:** {state['role']}\n"
2075
+ summary += f"**Date:** {time.strftime('%Y-%m-%d %H:%M:%S')}\n\n"
2076
+
2077
+ for i, qa in enumerate(state['answers']):
2078
+ summary += f"### Question {i + 1}\n"
2079
+ summary += f"**Q:** {qa['question']}\n\n"
2080
+ summary += f"**A:** {qa['answer']}\n\n"
2081
+ summary += "---\n\n"
2082
+
2083
+ # Save to file
2084
+ timestamp = time.strftime("%Y%m%d_%H%M%S")
2085
+ log_file = f"interview_log_{timestamp}.json"
2086
+ with open(log_file, "w", encoding="utf-8") as f:
2087
+ json.dump(state, f, indent=2, ensure_ascii=False)
2088
+
2089
+ summary += f"*Interview log saved as {log_file}*"
2090
+ return summary
2091
+
2092
+ # Wire up events
2093
+ start_btn.click(
2094
+ start_interview,
2095
+ inputs=[name_input, role_input],
2096
+ outputs=[start_section, interview_section, question_audio, question_text, progress_text, interview_state]
2097
+ )
2098
+
2099
+ user_audio_input.change(
2100
+ transcribe_audio,
2101
+ inputs=[user_audio_input],
2102
+ outputs=[transcript_display]
2103
+ )
2104
+
2105
+ submit_answer_btn.click(
2106
+ submit_answer,
2107
+ inputs=[transcript_display, interview_state],
2108
+ outputs=[submit_answer_btn, next_question_btn, question_audio, question_text, progress_text, interview_summary, interview_state]
2109
+ )
2110
+
2111
+ next_question_btn.click(
2112
+ next_question,
2113
+ inputs=[interview_state],
2114
+ outputs=[submit_answer_btn, next_question_btn, question_audio, question_text, progress_text, user_audio_input, transcript_display, interview_state]
2115
+ )
2116
 
2117
+ if __name__ == "__main__":
2118
+ demo.launch(debug=True)