Update app.py
Browse files
app.py
CHANGED
@@ -1740,45 +1740,72 @@ import json
|
|
1740 |
from transformers import BarkModel, AutoProcessor
|
1741 |
import torch, gc
|
1742 |
import whisper
|
|
|
|
|
|
|
1743 |
|
1744 |
print(torch.cuda.is_available())
|
1745 |
torch.cuda.empty_cache()
|
1746 |
gc.collect()
|
1747 |
|
1748 |
-
#
|
1749 |
-
|
1750 |
-
|
1751 |
-
|
1752 |
-
|
1753 |
-
|
1754 |
-
|
1755 |
-
|
1756 |
-
|
1757 |
-
def
|
1758 |
-
|
1759 |
-
|
1760 |
-
|
1761 |
-
|
1762 |
-
|
1763 |
-
|
1764 |
-
|
1765 |
-
|
1766 |
-
|
1767 |
-
|
1768 |
-
)
|
1769 |
-
|
1770 |
-
|
1771 |
-
|
1772 |
-
|
1773 |
-
|
1774 |
-
|
1775 |
-
|
1776 |
-
|
1777 |
-
|
1778 |
-
|
1779 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1780 |
def whisper_stt(audio_path):
|
1781 |
-
|
|
|
|
|
|
|
|
|
1782 |
result = whisper_model.transcribe(audio_path)
|
1783 |
return result["text"]
|
1784 |
|
@@ -1790,6 +1817,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
1790 |
user_data = gr.State({})
|
1791 |
interview_state = gr.State({})
|
1792 |
missing_fields_state = gr.State([])
|
|
|
1793 |
|
1794 |
with gr.Column(visible=True) as user_info_section:
|
1795 |
gr.Markdown("## Candidate Information")
|
@@ -1808,6 +1836,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
1808 |
with gr.Column(visible=False) as interview_pre_section:
|
1809 |
pre_interview_greeting_md = gr.Markdown()
|
1810 |
start_interview_final_btn = gr.Button("Start Interview")
|
|
|
1811 |
|
1812 |
with gr.Column(visible=False) as interview_section:
|
1813 |
gr.Markdown("## Interview in Progress")
|
@@ -1821,6 +1850,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
1821 |
|
1822 |
def validate_start_btn(cv_file, job_desc):
|
1823 |
return gr.update(interactive=(cv_file is not None and hasattr(cv_file, "name") and bool(job_desc and job_desc.strip())))
|
|
|
1824 |
cv_file.change(validate_start_btn, [cv_file, job_desc], start_btn)
|
1825 |
job_desc.change(validate_start_btn, [cv_file, job_desc], start_btn)
|
1826 |
|
@@ -1839,17 +1869,20 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
1839 |
else:
|
1840 |
greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' when ready."
|
1841 |
return data, missing, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True, value=greeting)
|
|
|
1842 |
start_btn.click(process_and_route_initial, [cv_file, job_desc], [user_data, missing_fields_state, user_info_section, missing_section, pre_interview_greeting_md])
|
1843 |
|
1844 |
def show_missing(missing):
|
1845 |
if missing is None: missing = []
|
1846 |
return gr.update(visible="name" in missing), gr.update(visible="job_role" in missing), gr.update(visible="seniority" in missing), gr.update(visible="skills" in missing)
|
|
|
1847 |
missing_fields_state.change(show_missing, missing_fields_state, [name_in, role_in, seniority_in, skills_in])
|
1848 |
|
1849 |
def validate_fields(name, role, seniority, skills, missing):
|
1850 |
if not missing: return gr.update(interactive=False)
|
1851 |
all_filled = all([(not ("name" in missing) or bool(name.strip())), (not ("job_role" in missing) or bool(role.strip())), (not ("seniority" in missing) or bool(seniority)), (not ("skills" in missing) or bool(skills.strip()))])
|
1852 |
return gr.update(interactive=all_filled)
|
|
|
1853 |
for inp in [name_in, role_in, seniority_in, skills_in]:
|
1854 |
inp.change(validate_fields, [name_in, role_in, seniority_in, skills_in, missing_fields_state], submit_btn)
|
1855 |
|
@@ -1860,14 +1893,18 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
1860 |
if not data["skills"]: data["skills"] = [s.strip() for s in skills.split(",")]
|
1861 |
greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' to begin."
|
1862 |
return data, gr.update(visible=False), gr.update(visible=True), gr.update(value=greeting)
|
|
|
1863 |
submit_btn.click(complete_manual, [user_data, name_in, role_in, seniority_in, skills_in], [user_data, missing_section, interview_pre_section, pre_interview_greeting_md])
|
1864 |
|
1865 |
-
def
|
|
|
1866 |
state = {
|
1867 |
"questions": [], "answers": [], "timings": [], "question_evaluations": [], "answer_evaluations": [],
|
1868 |
"conversation_history": [], "difficulty_adjustment": None, "question_idx": 0, "max_questions": 3,
|
1869 |
"q_start_time": time.time(), "log": []
|
1870 |
}
|
|
|
|
|
1871 |
context = ""
|
1872 |
prompt = build_interview_prompt(
|
1873 |
conversation_history=[], user_response="", context=context, job_role=data["job_role"],
|
@@ -1875,16 +1912,50 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
1875 |
)
|
1876 |
first_q = groq_llm.predict(prompt)
|
1877 |
q_eval = {"Score": "N/A", "Reasoning": "Skipped to reduce processing time", "Improvements": []}
|
|
|
1878 |
state["questions"].append(first_q)
|
1879 |
state["question_evaluations"].append(q_eval)
|
1880 |
state["conversation_history"].append({'role': 'Interviewer', 'content': first_q})
|
1881 |
-
audio_path = bark_tts(first_q)
|
1882 |
state["log"].append({"type": "question", "question": first_q, "question_eval": q_eval, "timestamp": time.time()})
|
1883 |
-
|
1884 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1885 |
|
1886 |
def transcribe(audio_path):
|
1887 |
return whisper_stt(audio_path)
|
|
|
1888 |
user_audio_input.change(transcribe, user_audio_input, stt_transcript)
|
1889 |
|
1890 |
def process_answer(transcript, audio_path, state, data):
|
@@ -1948,7 +2019,12 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
1948 |
state["question_evaluations"].append(q_eval)
|
1949 |
state["conversation_history"].append({'role': 'Interviewer', 'content': next_q})
|
1950 |
state["log"].append({"type": "question", "question": next_q, "question_eval": q_eval, "timestamp": time.time()})
|
1951 |
-
|
|
|
|
|
|
|
|
|
|
|
1952 |
eval_md = f"*Last Answer Eval:* {answer_eval}"
|
1953 |
return state, gr.update(visible=False), audio_path, f"*Question {qidx + 1}:* {next_q}", gr.update(value=None), gr.update(value=None), gr.update(visible=True, value=eval_md)
|
1954 |
|
@@ -1962,4 +2038,3 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
1962 |
|
1963 |
demo.launch(debug=True)
|
1964 |
|
1965 |
-
|
|
|
1740 |
from transformers import BarkModel, AutoProcessor
|
1741 |
import torch, gc
|
1742 |
import whisper
|
1743 |
+
import asyncio
|
1744 |
+
import threading
|
1745 |
+
from concurrent.futures import ThreadPoolExecutor
|
1746 |
|
1747 |
print(torch.cuda.is_available())
|
1748 |
torch.cuda.empty_cache()
|
1749 |
gc.collect()
|
1750 |
|
1751 |
+
# Global variables for lazy loading
|
1752 |
+
model_bark = None
|
1753 |
+
processor_bark = None
|
1754 |
+
whisper_model = None
|
1755 |
+
bark_voice_preset = "v2/en_speaker_9"
|
1756 |
+
|
1757 |
+
# Thread pool for async operations
|
1758 |
+
executor = ThreadPoolExecutor(max_workers=2)
|
1759 |
+
|
1760 |
+
def load_models_lazy():
|
1761 |
+
"""Load models only when needed"""
|
1762 |
+
global model_bark, processor_bark, whisper_model
|
1763 |
+
|
1764 |
+
if model_bark is None:
|
1765 |
+
print("π Loading Bark model...")
|
1766 |
+
model_bark = BarkModel.from_pretrained("suno/bark").to("cuda" if torch.cuda.is_available() else "cpu")
|
1767 |
+
print("β
Bark model loaded")
|
1768 |
+
|
1769 |
+
if processor_bark is None:
|
1770 |
+
print("π Loading Bark processor...")
|
1771 |
+
processor_bark = AutoProcessor.from_pretrained("suno/bark")
|
1772 |
+
print("β
Bark processor loaded")
|
1773 |
+
|
1774 |
+
if whisper_model is None:
|
1775 |
+
print("π Loading Whisper model...")
|
1776 |
+
whisper_model = whisper.load_model("base", device="cuda")
|
1777 |
+
print("β
Whisper model loaded")
|
1778 |
+
|
1779 |
+
def bark_tts_async(text):
|
1780 |
+
"""Async TTS generation"""
|
1781 |
+
def _generate():
|
1782 |
+
load_models_lazy() # Load only when needed
|
1783 |
+
print(f"π Synthesizing TTS for: {text}")
|
1784 |
+
inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
|
1785 |
+
input_ids = inputs["input_ids"].to(model_bark.device)
|
1786 |
+
start = time.time()
|
1787 |
+
with torch.no_grad():
|
1788 |
+
speech_values = model_bark.generate(
|
1789 |
+
input_ids=input_ids,
|
1790 |
+
do_sample=True,
|
1791 |
+
fine_temperature=0.4,
|
1792 |
+
coarse_temperature=0.8
|
1793 |
+
)
|
1794 |
+
print(f"β
Bark finished in {round(time.time() - start, 2)}s")
|
1795 |
+
speech = speech_values.cpu().numpy().squeeze()
|
1796 |
+
speech = (speech * 32767).astype(np.int16)
|
1797 |
+
temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
|
1798 |
+
wavfile.write(temp_wav.name, 22050, speech)
|
1799 |
+
return temp_wav.name
|
1800 |
+
|
1801 |
+
return executor.submit(_generate)
|
1802 |
+
|
1803 |
def whisper_stt(audio_path):
|
1804 |
+
"""Lazy loading whisper STT"""
|
1805 |
+
if not audio_path or not os.path.exists(audio_path):
|
1806 |
+
return ""
|
1807 |
+
|
1808 |
+
load_models_lazy() # Load only when needed
|
1809 |
result = whisper_model.transcribe(audio_path)
|
1810 |
return result["text"]
|
1811 |
|
|
|
1817 |
user_data = gr.State({})
|
1818 |
interview_state = gr.State({})
|
1819 |
missing_fields_state = gr.State([])
|
1820 |
+
tts_future = gr.State(None) # Store async TTS future
|
1821 |
|
1822 |
with gr.Column(visible=True) as user_info_section:
|
1823 |
gr.Markdown("## Candidate Information")
|
|
|
1836 |
with gr.Column(visible=False) as interview_pre_section:
|
1837 |
pre_interview_greeting_md = gr.Markdown()
|
1838 |
start_interview_final_btn = gr.Button("Start Interview")
|
1839 |
+
loading_status = gr.Markdown("", visible=False)
|
1840 |
|
1841 |
with gr.Column(visible=False) as interview_section:
|
1842 |
gr.Markdown("## Interview in Progress")
|
|
|
1850 |
|
1851 |
def validate_start_btn(cv_file, job_desc):
|
1852 |
return gr.update(interactive=(cv_file is not None and hasattr(cv_file, "name") and bool(job_desc and job_desc.strip())))
|
1853 |
+
|
1854 |
cv_file.change(validate_start_btn, [cv_file, job_desc], start_btn)
|
1855 |
job_desc.change(validate_start_btn, [cv_file, job_desc], start_btn)
|
1856 |
|
|
|
1869 |
else:
|
1870 |
greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' when ready."
|
1871 |
return data, missing, gr.update(visible=False), gr.update(visible=False), gr.update(visible=True, value=greeting)
|
1872 |
+
|
1873 |
start_btn.click(process_and_route_initial, [cv_file, job_desc], [user_data, missing_fields_state, user_info_section, missing_section, pre_interview_greeting_md])
|
1874 |
|
1875 |
def show_missing(missing):
|
1876 |
if missing is None: missing = []
|
1877 |
return gr.update(visible="name" in missing), gr.update(visible="job_role" in missing), gr.update(visible="seniority" in missing), gr.update(visible="skills" in missing)
|
1878 |
+
|
1879 |
missing_fields_state.change(show_missing, missing_fields_state, [name_in, role_in, seniority_in, skills_in])
|
1880 |
|
1881 |
def validate_fields(name, role, seniority, skills, missing):
|
1882 |
if not missing: return gr.update(interactive=False)
|
1883 |
all_filled = all([(not ("name" in missing) or bool(name.strip())), (not ("job_role" in missing) or bool(role.strip())), (not ("seniority" in missing) or bool(seniority)), (not ("skills" in missing) or bool(skills.strip()))])
|
1884 |
return gr.update(interactive=all_filled)
|
1885 |
+
|
1886 |
for inp in [name_in, role_in, seniority_in, skills_in]:
|
1887 |
inp.change(validate_fields, [name_in, role_in, seniority_in, skills_in, missing_fields_state], submit_btn)
|
1888 |
|
|
|
1893 |
if not data["skills"]: data["skills"] = [s.strip() for s in skills.split(",")]
|
1894 |
greeting = f"Hello {data['name']}, your profile is ready. Click 'Start Interview' to begin."
|
1895 |
return data, gr.update(visible=False), gr.update(visible=True), gr.update(value=greeting)
|
1896 |
+
|
1897 |
submit_btn.click(complete_manual, [user_data, name_in, role_in, seniority_in, skills_in], [user_data, missing_section, interview_pre_section, pre_interview_greeting_md])
|
1898 |
|
1899 |
+
def start_interview_immediate(data):
|
1900 |
+
"""Start interview immediately, begin TTS generation in background"""
|
1901 |
state = {
|
1902 |
"questions": [], "answers": [], "timings": [], "question_evaluations": [], "answer_evaluations": [],
|
1903 |
"conversation_history": [], "difficulty_adjustment": None, "question_idx": 0, "max_questions": 3,
|
1904 |
"q_start_time": time.time(), "log": []
|
1905 |
}
|
1906 |
+
|
1907 |
+
# Generate question text first (fast)
|
1908 |
context = ""
|
1909 |
prompt = build_interview_prompt(
|
1910 |
conversation_history=[], user_response="", context=context, job_role=data["job_role"],
|
|
|
1912 |
)
|
1913 |
first_q = groq_llm.predict(prompt)
|
1914 |
q_eval = {"Score": "N/A", "Reasoning": "Skipped to reduce processing time", "Improvements": []}
|
1915 |
+
|
1916 |
state["questions"].append(first_q)
|
1917 |
state["question_evaluations"].append(q_eval)
|
1918 |
state["conversation_history"].append({'role': 'Interviewer', 'content': first_q})
|
|
|
1919 |
state["log"].append({"type": "question", "question": first_q, "question_eval": q_eval, "timestamp": time.time()})
|
1920 |
+
|
1921 |
+
# Start TTS generation in background
|
1922 |
+
tts_future_obj = bark_tts_async(first_q)
|
1923 |
+
|
1924 |
+
# Return immediately with loading message
|
1925 |
+
return (state, tts_future_obj,
|
1926 |
+
gr.update(visible=False),
|
1927 |
+
gr.update(visible=True),
|
1928 |
+
gr.update(visible=True, value="π Generating audio..."),
|
1929 |
+
gr.update(value=None),
|
1930 |
+
f"*Question 1:* {first_q}")
|
1931 |
+
|
1932 |
+
def check_tts_ready(state, tts_future_obj):
|
1933 |
+
"""Check if TTS is ready and update audio"""
|
1934 |
+
if tts_future_obj and tts_future_obj.done():
|
1935 |
+
try:
|
1936 |
+
audio_path = tts_future_obj.result()
|
1937 |
+
return gr.update(value=audio_path), gr.update(visible=False), None
|
1938 |
+
except Exception as e:
|
1939 |
+
print(f"TTS Error: {e}")
|
1940 |
+
return gr.update(value=None), gr.update(value=f"Error generating audio: {e}"), None
|
1941 |
+
else:
|
1942 |
+
return gr.update(), gr.update(), tts_future_obj
|
1943 |
+
|
1944 |
+
start_interview_final_btn.click(
|
1945 |
+
start_interview_immediate,
|
1946 |
+
[user_data],
|
1947 |
+
[interview_state, tts_future, interview_pre_section, interview_section, loading_status, question_audio, question_text]
|
1948 |
+
).then(
|
1949 |
+
# Check TTS status every 500ms
|
1950 |
+
check_tts_ready,
|
1951 |
+
[interview_state, tts_future],
|
1952 |
+
[question_audio, loading_status, tts_future],
|
1953 |
+
every=0.5
|
1954 |
+
)
|
1955 |
|
1956 |
def transcribe(audio_path):
|
1957 |
return whisper_stt(audio_path)
|
1958 |
+
|
1959 |
user_audio_input.change(transcribe, user_audio_input, stt_transcript)
|
1960 |
|
1961 |
def process_answer(transcript, audio_path, state, data):
|
|
|
2019 |
state["question_evaluations"].append(q_eval)
|
2020 |
state["conversation_history"].append({'role': 'Interviewer', 'content': next_q})
|
2021 |
state["log"].append({"type": "question", "question": next_q, "question_eval": q_eval, "timestamp": time.time()})
|
2022 |
+
|
2023 |
+
# Generate TTS asynchronously for next question too
|
2024 |
+
audio_future = bark_tts_async(next_q)
|
2025 |
+
# For now, we'll wait for it (you can make this async too)
|
2026 |
+
audio_path = audio_future.result()
|
2027 |
+
|
2028 |
eval_md = f"*Last Answer Eval:* {answer_eval}"
|
2029 |
return state, gr.update(visible=False), audio_path, f"*Question {qidx + 1}:* {next_q}", gr.update(value=None), gr.update(value=None), gr.update(visible=True, value=eval_md)
|
2030 |
|
|
|
2038 |
|
2039 |
demo.launch(debug=True)
|
2040 |
|
|