Update app.py
Browse files
app.py
CHANGED
@@ -1857,26 +1857,35 @@ def extract_candidate_details(file_path):
|
|
1857 |
# )
|
1858 |
|
1859 |
# demo.launch(debug=True)
|
1860 |
-
|
1861 |
import gradio as gr
|
1862 |
import time
|
1863 |
import tempfile
|
1864 |
import numpy as np
|
1865 |
import scipy.io.wavfile as wavfile
|
1866 |
import os
|
|
|
1867 |
import torch
|
1868 |
import whisper
|
1869 |
from transformers import BarkModel, AutoProcessor
|
1870 |
|
1871 |
-
#
|
1872 |
-
|
|
|
|
|
|
|
|
|
1873 |
processor_bark = AutoProcessor.from_pretrained("suno/bark")
|
|
|
1874 |
bark_voice_preset = "v2/en_speaker_6"
|
|
|
1875 |
|
1876 |
-
|
1877 |
whisper_model = whisper.load_model("base", device="cuda" if torch.cuda.is_available() else "cpu")
|
|
|
1878 |
|
1879 |
def bark_tts(text):
|
|
|
|
|
1880 |
inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
|
1881 |
inputs = {k: v.to(model_bark.device) for k, v in inputs.items()}
|
1882 |
speech_values = model_bark.generate(**inputs)
|
@@ -1887,33 +1896,223 @@ def bark_tts(text):
|
|
1887 |
return temp_wav.name
|
1888 |
|
1889 |
def whisper_stt(audio_path):
|
|
|
1890 |
if not audio_path or not os.path.exists(audio_path):
|
1891 |
return ""
|
1892 |
result = whisper_model.transcribe(audio_path)
|
1893 |
return result["text"]
|
1894 |
|
1895 |
-
#
|
1896 |
-
|
1897 |
-
|
1898 |
-
|
1899 |
-
|
1900 |
-
transcript = whisper_stt(audio_path)
|
1901 |
-
state["conversation"].append({"role": "Candidate", "content": transcript})
|
1902 |
-
|
1903 |
-
prompt = "\n".join([f"{turn['role']}: {turn['content']}" for turn in state["conversation"]])
|
1904 |
-
next_q = groq_llm_predict(prompt)
|
1905 |
-
state["conversation"].append({"role": "Interviewer", "content": next_q})
|
1906 |
-
|
1907 |
-
audio_out = bark_tts(next_q)
|
1908 |
-
return state, audio_out, transcript
|
1909 |
-
|
1910 |
-
with gr.Blocks() as demo:
|
1911 |
-
state = gr.State({"conversation": []})
|
1912 |
-
question_audio = gr.Audio(label="Interviewer's Question", interactive=False, autoplay=True)
|
1913 |
-
user_audio_input = gr.Audio(source="microphone", type="filepath", label="Your Answer")
|
1914 |
-
transcript_box = gr.Textbox(label="Transcript", interactive=False)
|
1915 |
-
|
1916 |
-
user_audio_input.change(interview_loop, [state, user_audio_input], [state, question_audio, transcript_box])
|
1917 |
|
1918 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1919 |
|
|
|
|
|
|
1857 |
# )
|
1858 |
|
1859 |
# demo.launch(debug=True)
|
|
|
1860 |
import gradio as gr
|
1861 |
import time
|
1862 |
import tempfile
|
1863 |
import numpy as np
|
1864 |
import scipy.io.wavfile as wavfile
|
1865 |
import os
|
1866 |
+
import json
|
1867 |
import torch
|
1868 |
import whisper
|
1869 |
from transformers import BarkModel, AutoProcessor
|
1870 |
|
1871 |
+
# Clear GPU memory
|
1872 |
+
torch.cuda.empty_cache()
|
1873 |
+
|
1874 |
+
# Load only essential models
|
1875 |
+
print("🔁 Loading Bark model...")
|
1876 |
+
model_bark = BarkModel.from_pretrained("suno/bark")
|
1877 |
processor_bark = AutoProcessor.from_pretrained("suno/bark")
|
1878 |
+
model_bark.to("cuda" if torch.cuda.is_available() else "cpu")
|
1879 |
bark_voice_preset = "v2/en_speaker_6"
|
1880 |
+
print("✅ Bark model loaded")
|
1881 |
|
1882 |
+
print("🔁 Loading Whisper model...")
|
1883 |
whisper_model = whisper.load_model("base", device="cuda" if torch.cuda.is_available() else "cpu")
|
1884 |
+
print("✅ Whisper model loaded")
|
1885 |
|
1886 |
def bark_tts(text):
|
1887 |
+
"""Convert text to speech using Bark"""
|
1888 |
+
print(f"🔁 Synthesizing TTS for: {text}")
|
1889 |
inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
|
1890 |
inputs = {k: v.to(model_bark.device) for k, v in inputs.items()}
|
1891 |
speech_values = model_bark.generate(**inputs)
|
|
|
1896 |
return temp_wav.name
|
1897 |
|
1898 |
def whisper_stt(audio_path):
|
1899 |
+
"""Convert speech to text using Whisper"""
|
1900 |
if not audio_path or not os.path.exists(audio_path):
|
1901 |
return ""
|
1902 |
result = whisper_model.transcribe(audio_path)
|
1903 |
return result["text"]
|
1904 |
|
1905 |
+
# Simple predefined questions for quick interview
|
1906 |
+
INTERVIEW_QUESTIONS = [
|
1907 |
+
"Tell me about yourself and your background.",
|
1908 |
+
"What are your greatest strengths and how do they relate to this role?"
|
1909 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1910 |
|
1911 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
1912 |
+
# State variables
|
1913 |
+
interview_state = gr.State({"current_question": 0, "answers": [], "started": False})
|
1914 |
+
|
1915 |
+
# UI Layout
|
1916 |
+
with gr.Column(visible=True) as start_section:
|
1917 |
+
gr.Markdown("## Quick Interview Setup")
|
1918 |
+
name_input = gr.Textbox(label="Your Name", placeholder="Enter your name")
|
1919 |
+
role_input = gr.Textbox(label="Job Role", placeholder="e.g., Software Engineer")
|
1920 |
+
start_btn = gr.Button("Start Interview", variant="primary")
|
1921 |
+
|
1922 |
+
with gr.Column(visible=False) as interview_section:
|
1923 |
+
gr.Markdown("## Interview in Progress")
|
1924 |
+
question_audio = gr.Audio(label="Listen to Question", interactive=False, autoplay=True)
|
1925 |
+
question_text = gr.Markdown()
|
1926 |
+
|
1927 |
+
# Audio input for user response
|
1928 |
+
user_audio_input = gr.Audio(
|
1929 |
+
sources=["microphone"],
|
1930 |
+
type="filepath",
|
1931 |
+
label="Record Your Answer"
|
1932 |
+
)
|
1933 |
+
|
1934 |
+
# Transcription display
|
1935 |
+
transcript_display = gr.Textbox(
|
1936 |
+
label="Your Answer (Transcribed)",
|
1937 |
+
interactive=False
|
1938 |
+
)
|
1939 |
+
|
1940 |
+
# Control buttons
|
1941 |
+
with gr.Row():
|
1942 |
+
submit_answer_btn = gr.Button("Submit Answer", variant="primary")
|
1943 |
+
next_question_btn = gr.Button("Next Question", visible=False)
|
1944 |
+
|
1945 |
+
# Progress indicator
|
1946 |
+
progress_text = gr.Markdown()
|
1947 |
+
|
1948 |
+
# Interview summary
|
1949 |
+
interview_summary = gr.Markdown(visible=False)
|
1950 |
+
|
1951 |
+
# Event handlers
|
1952 |
+
def start_interview(name, role):
|
1953 |
+
"""Initialize interview with first question"""
|
1954 |
+
if not name.strip() or not role.strip():
|
1955 |
+
return (
|
1956 |
+
gr.update(),
|
1957 |
+
gr.update(),
|
1958 |
+
gr.update(),
|
1959 |
+
gr.update(),
|
1960 |
+
gr.update(value="Please fill in both name and role fields."),
|
1961 |
+
{"current_question": 0, "answers": [], "started": False, "name": "", "role": ""}
|
1962 |
+
)
|
1963 |
+
|
1964 |
+
# Generate first question audio
|
1965 |
+
question_text = INTERVIEW_QUESTIONS[0]
|
1966 |
+
audio_path = bark_tts(question_text)
|
1967 |
+
|
1968 |
+
state = {
|
1969 |
+
"current_question": 0,
|
1970 |
+
"answers": [],
|
1971 |
+
"started": True,
|
1972 |
+
"name": name,
|
1973 |
+
"role": role
|
1974 |
+
}
|
1975 |
+
|
1976 |
+
return (
|
1977 |
+
gr.update(visible=False), # start_section
|
1978 |
+
gr.update(visible=True), # interview_section
|
1979 |
+
audio_path, # question_audio
|
1980 |
+
f"**Question 1:** {question_text}", # question_text
|
1981 |
+
gr.update(value=f"Question 1 of {len(INTERVIEW_QUESTIONS)}"), # progress_text
|
1982 |
+
state
|
1983 |
+
)
|
1984 |
+
|
1985 |
+
def transcribe_audio(audio_path):
|
1986 |
+
"""Transcribe user's audio input"""
|
1987 |
+
if not audio_path:
|
1988 |
+
return ""
|
1989 |
+
return whisper_stt(audio_path)
|
1990 |
+
|
1991 |
+
def submit_answer(transcript, state):
|
1992 |
+
"""Process submitted answer and prepare for next question"""
|
1993 |
+
if not transcript.strip():
|
1994 |
+
return (
|
1995 |
+
gr.update(),
|
1996 |
+
gr.update(),
|
1997 |
+
gr.update(),
|
1998 |
+
gr.update(),
|
1999 |
+
gr.update(value="Please record an answer before submitting."),
|
2000 |
+
gr.update(),
|
2001 |
+
state
|
2002 |
+
)
|
2003 |
+
|
2004 |
+
# Store the answer
|
2005 |
+
state["answers"].append({
|
2006 |
+
"question": INTERVIEW_QUESTIONS[state["current_question"]],
|
2007 |
+
"answer": transcript,
|
2008 |
+
"timestamp": time.time()
|
2009 |
+
})
|
2010 |
+
|
2011 |
+
current_q = state["current_question"]
|
2012 |
+
|
2013 |
+
# Check if this was the last question
|
2014 |
+
if current_q >= len(INTERVIEW_QUESTIONS) - 1:
|
2015 |
+
# Interview complete
|
2016 |
+
summary = generate_interview_summary(state)
|
2017 |
+
return (
|
2018 |
+
gr.update(visible=False), # submit_answer_btn
|
2019 |
+
gr.update(visible=False), # next_question_btn
|
2020 |
+
gr.update(value=None), # question_audio
|
2021 |
+
gr.update(value="**Interview Complete!**"), # question_text
|
2022 |
+
gr.update(value="Interview finished. Thank you!"), # progress_text
|
2023 |
+
gr.update(visible=True, value=summary), # interview_summary
|
2024 |
+
state
|
2025 |
+
)
|
2026 |
+
else:
|
2027 |
+
# Show next question button
|
2028 |
+
return (
|
2029 |
+
gr.update(visible=False), # submit_answer_btn
|
2030 |
+
gr.update(visible=True), # next_question_btn
|
2031 |
+
gr.update(), # question_audio
|
2032 |
+
gr.update(), # question_text
|
2033 |
+
gr.update(value=f"Answer submitted! Ready for question {current_q + 2}?"), # progress_text
|
2034 |
+
gr.update(), # interview_summary
|
2035 |
+
state
|
2036 |
+
)
|
2037 |
+
|
2038 |
+
def next_question(state):
|
2039 |
+
"""Move to next question"""
|
2040 |
+
state["current_question"] += 1
|
2041 |
+
current_q = state["current_question"]
|
2042 |
+
|
2043 |
+
if current_q < len(INTERVIEW_QUESTIONS):
|
2044 |
+
question_text = INTERVIEW_QUESTIONS[current_q]
|
2045 |
+
audio_path = bark_tts(question_text)
|
2046 |
+
|
2047 |
+
return (
|
2048 |
+
gr.update(visible=True), # submit_answer_btn
|
2049 |
+
gr.update(visible=False), # next_question_btn
|
2050 |
+
audio_path, # question_audio
|
2051 |
+
f"**Question {current_q + 1}:** {question_text}", # question_text
|
2052 |
+
gr.update(value=f"Question {current_q + 1} of {len(INTERVIEW_QUESTIONS)}"), # progress_text
|
2053 |
+
gr.update(value=None), # user_audio_input (clear)
|
2054 |
+
gr.update(value=""), # transcript_display (clear)
|
2055 |
+
state
|
2056 |
+
)
|
2057 |
+
else:
|
2058 |
+
# This shouldn't happen, but handle gracefully
|
2059 |
+
return (
|
2060 |
+
gr.update(),
|
2061 |
+
gr.update(),
|
2062 |
+
gr.update(),
|
2063 |
+
gr.update(value="Interview Complete!"),
|
2064 |
+
gr.update(value="All questions answered."),
|
2065 |
+
gr.update(),
|
2066 |
+
gr.update(),
|
2067 |
+
state
|
2068 |
+
)
|
2069 |
+
|
2070 |
+
def generate_interview_summary(state):
|
2071 |
+
"""Generate final interview summary"""
|
2072 |
+
summary = f"# Interview Summary\n\n"
|
2073 |
+
summary += f"**Candidate:** {state['name']}\n"
|
2074 |
+
summary += f"**Role:** {state['role']}\n"
|
2075 |
+
summary += f"**Date:** {time.strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
2076 |
+
|
2077 |
+
for i, qa in enumerate(state['answers']):
|
2078 |
+
summary += f"### Question {i + 1}\n"
|
2079 |
+
summary += f"**Q:** {qa['question']}\n\n"
|
2080 |
+
summary += f"**A:** {qa['answer']}\n\n"
|
2081 |
+
summary += "---\n\n"
|
2082 |
+
|
2083 |
+
# Save to file
|
2084 |
+
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
2085 |
+
log_file = f"interview_log_{timestamp}.json"
|
2086 |
+
with open(log_file, "w", encoding="utf-8") as f:
|
2087 |
+
json.dump(state, f, indent=2, ensure_ascii=False)
|
2088 |
+
|
2089 |
+
summary += f"*Interview log saved as {log_file}*"
|
2090 |
+
return summary
|
2091 |
+
|
2092 |
+
# Wire up events
|
2093 |
+
start_btn.click(
|
2094 |
+
start_interview,
|
2095 |
+
inputs=[name_input, role_input],
|
2096 |
+
outputs=[start_section, interview_section, question_audio, question_text, progress_text, interview_state]
|
2097 |
+
)
|
2098 |
+
|
2099 |
+
user_audio_input.change(
|
2100 |
+
transcribe_audio,
|
2101 |
+
inputs=[user_audio_input],
|
2102 |
+
outputs=[transcript_display]
|
2103 |
+
)
|
2104 |
+
|
2105 |
+
submit_answer_btn.click(
|
2106 |
+
submit_answer,
|
2107 |
+
inputs=[transcript_display, interview_state],
|
2108 |
+
outputs=[submit_answer_btn, next_question_btn, question_audio, question_text, progress_text, interview_summary, interview_state]
|
2109 |
+
)
|
2110 |
+
|
2111 |
+
next_question_btn.click(
|
2112 |
+
next_question,
|
2113 |
+
inputs=[interview_state],
|
2114 |
+
outputs=[submit_answer_btn, next_question_btn, question_audio, question_text, progress_text, user_audio_input, transcript_display, interview_state]
|
2115 |
+
)
|
2116 |
|
2117 |
+
if __name__ == "__main__":
|
2118 |
+
demo.launch(debug=True)
|