import os import json from collections import deque from dotenv import load_dotenv import gradio as gr from langchain_openai import ChatOpenAI from langchain.schema import HumanMessage, SystemMessage from openai import OpenAI import tempfile import time # Load environment variables load_dotenv() # Function to read questions from JSON def read_questions_from_json(file_path): if not os.path.exists(file_path): raise FileNotFoundError(f"The file '{file_path}' does not exist.") with open(file_path, 'r') as f: questions_list = json.load(f) if not questions_list: raise ValueError("The JSON file is empty or has invalid content.") return questions_list # Function to convert text to speech def convert_text_to_speech(text): start_time = time.time() try: client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) response = client.audio.speech.create(model="tts-1", voice="alloy", input=text) # Save the audio stream to a temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: for chunk in response.iter_bytes(): tmp_file.write(chunk) temp_audio_path = tmp_file.name print(f"DEBUG - Text-to-speech conversion time: {time.time() - start_time:.2f} seconds") return temp_audio_path except Exception as e: print(f"Error during text-to-speech conversion: {e}") return None # Function to transcribe audio def transcribe_audio(audio_file_path): start_time = time.time() try: client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) with open(audio_file_path, "rb") as audio_file: transcription = client.audio.transcriptions.create( model="whisper-1", file=audio_file ) print(f"DEBUG - Audio transcription time: {time.time() - start_time:.2f} seconds") return transcription.text except Exception as e: print(f"Error during audio transcription: {e}") return None # Conduct interview and handle user input def conduct_interview(questions, language="English", history_limit=5): start_time = time.time() openai_api_key = os.getenv("OPENAI_API_KEY") if not openai_api_key: raise RuntimeError("OpenAI API key not found. Please add it to your .env file as OPENAI_API_KEY.") chat = ChatOpenAI( openai_api_key=openai_api_key, model="gpt-4o", temperature=0.7, max_tokens=750 ) conversation_history = deque(maxlen=history_limit) system_prompt = (f"You are Sarah, an empathetic HR interviewer conducting a technical interview in {language}. " "Respond to user follow-up questions politely and concisely. If the user is confused, provide clear clarification.") interview_data = [] current_question_index = [0] # Use a list to hold the index is_interview_finished = False initial_message = ("👋 Hi there, I'm Sarah, your friendly AI HR assistant! " "I'll guide you through a series of interview questions to learn more about you. " "Take your time and answer each question thoughtfully.") final_message = "That wraps up our interview. Thank you so much for your responses—it's been great learning more about you!" print(f"DEBUG - conduct_interview setup time: {time.time() - start_time:.2f} seconds") def interview_step(user_input, audio_input, history): nonlocal current_question_index nonlocal is_interview_finished step_start_time = time.time() # Transcribe audio input if provided if audio_input: user_input = transcribe_audio(audio_input) print("Transcription:", user_input) if user_input.lower() in ["exit", "quit"]: history.append({"role": "assistant", "content": "The interview has ended at your request. Thank you for your time!"}) is_interview_finished = True return history, "", None # If interview is finished, do nothing if is_interview_finished: return history, "", None question_text = questions[current_question_index[0]] history_content = "\n".join([f"Q: {entry['question']}\nA: {entry['answer']}" for entry in conversation_history]) combined_prompt = (f"{system_prompt}\n\nPrevious conversation history:\n{history_content}\n\n" f"Current question: {question_text}\nUser's input: {user_input}\n\n" "Respond in a warm and conversational way, offering natural follow-ups if needed.") messages = [ SystemMessage(content=system_prompt), HumanMessage(content=combined_prompt) ] chat_start_time = time.time() response = chat.invoke(messages) print(f"DEBUG - Chat response time: {time.time() - chat_start_time:.2f} seconds") response_content = response.content.strip() # Convert response to speech audio_file_path = convert_text_to_speech(response_content) conversation_history.append({"question": question_text, "answer": user_input}) interview_data.append({"question": question_text, "answer": user_input}) # Use the correct format for messages history.append({"role": "user", "content": user_input}) history.append({"role": "assistant", "content": response_content}) if current_question_index[0] + 1 < len(questions): current_question_index[0] += 1 next_question = f"Alright, let's move on. {questions[current_question_index[0]]}" next_question_audio_path = convert_text_to_speech(next_question) history.append({"role": "assistant", "content": next_question}) print(f"DEBUG - Interview step time: {time.time() - step_start_time:.2f} seconds") return history, "", next_question_audio_path else: # Convert final message to speech and play it final_message_audio_path = convert_text_to_speech(final_message) history.append({"role": "assistant", "content": final_message}) # Convert the last question to speech last_question_audio_path = convert_text_to_speech(questions[current_question_index[0]]) is_interview_finished = True print(f"DEBUG - Interview step time: {time.time() - step_start_time:.2f} seconds") return history, "", last_question_audio_path return interview_step, initial_message, final_message # Gradio interface def main(): QUESTIONS_FILE_PATH = "questions.json" # Ensure you have a questions.json file with your interview questions try: questions = read_questions_from_json(QUESTIONS_FILE_PATH) interview_func, initial_message, final_message = conduct_interview(questions) css = """ .contain { display: flex; flex-direction: column; } .gradio-container { height: 100vh !important; } #component-0 { height: 100%; } .chatbot { flex-grow: 1; overflow: auto; height: 100px; } .chatbot .wrap.svelte-1275q59.wrap.svelte-1275q59 {flex-wrap : nowrap !important} .user > div > .message {background-color : #dcf8c6 !important} .bot > div > .message {background-color : #f7f7f8 !important} """ with gr.Blocks(css=css) as demo: gr.Markdown("""

👋 Welcome to Your AI HR Interview Assistant

""") start_btn = gr.Button("Start Interview", variant="primary") gr.Markdown("""

I will ask you a series of questions. Please answer honestly and thoughtfully. When you are ready, click "Start Interview" to begin.

""") chatbot = gr.Chatbot(label="Interview Chat", elem_id="chatbot", height=650, type='messages') audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Record Your Answer") user_input = gr.Textbox(label="Your Response", placeholder="Type your answer here or use the microphone...", lines=1) audio_output = gr.Audio(label="Response Audio", autoplay=True) with gr.Row(): submit_btn = gr.Button("Submit", variant="primary") clear_btn = gr.Button("Clear Chat") def start_interview(): history = [] # Convert and play initial message start_time = time.time() initial_audio_path = convert_text_to_speech(initial_message) # Combine initial message and first question first_question = "Let's begin! Here's your first question: " + questions[0] combined_message = initial_message + " " + first_question # Convert combined message to speech combined_audio_path = convert_text_to_speech(combined_message) history.append({"role": "assistant", "content": combined_message}) print(f"DEBUG - Initial message audio time: {time.time() - start_time:.2f} seconds") return history, "", combined_audio_path def clear_interview(): # Reset the interview state interview_func, initial_message, final_message = conduct_interview(questions) return [], "", None def interview_step_wrapper(user_response, audio_response, history): history, _, audio_path = interview_func(user_response, audio_response, history) time.sleep(0.1) # Reduced delay return history, "", audio_path def on_enter_submit(history, user_response): if not user_response.strip(): return history, "", None history, _, audio_path = interview_step_wrapper(user_response, None, history) time.sleep(0.1) # Reduced delay return history, "", audio_path audio_input.stop_recording(interview_step_wrapper, inputs=[user_input, audio_input, chatbot], outputs=[chatbot, user_input, audio_output]) start_btn.click(start_interview, inputs=[], outputs=[chatbot, user_input, audio_output]) submit_btn.click(interview_step_wrapper, inputs=[user_input, audio_input, chatbot], outputs=[chatbot, user_input, audio_output]) user_input.submit(on_enter_submit, inputs=[chatbot, user_input], outputs=[chatbot, user_input, audio_output]) clear_btn.click(clear_interview, inputs=[], outputs=[chatbot, user_input, audio_output]) demo.launch() except Exception as e: print(f"Error: {e}") if __name__ == "__main__": main()