Spaces:
Sleeping
Sleeping
Commit
·
eee97a9
1
Parent(s):
1d5b387
Reduced tts delay
Browse files- api/audio.py +1 -5
- api/llm.py +9 -28
- resources/prompts.py +3 -1
- ui/coding.py +68 -5
api/audio.py
CHANGED
|
@@ -230,8 +230,4 @@ class TTSManager:
|
|
| 230 |
:return: Generator yielding chunks of audio bytes.
|
| 231 |
"""
|
| 232 |
if len(chat_history) > 0 and chat_history[-1][1]:
|
| 233 |
-
|
| 234 |
-
while n >= 0 and chat_history[n][1]:
|
| 235 |
-
n -= 1
|
| 236 |
-
for i in range(n + 1, len(chat_history)):
|
| 237 |
-
yield from self.read_text(chat_history[i][1])
|
|
|
|
| 230 |
:return: Generator yielding chunks of audio bytes.
|
| 231 |
"""
|
| 232 |
if len(chat_history) > 0 and chat_history[-1][1]:
|
| 233 |
+
yield from self.read_text(chat_history[-1][1])
|
|
|
|
|
|
|
|
|
|
|
|
api/llm.py
CHANGED
|
@@ -59,11 +59,9 @@ class LLMManager:
|
|
| 59 |
response = self.client.chat.completions.create(
|
| 60 |
model=self.config.llm.name, messages=messages, temperature=1, stream=True, max_tokens=2000
|
| 61 |
)
|
| 62 |
-
text = ""
|
| 63 |
for chunk in response:
|
| 64 |
if chunk.choices[0].delta.content:
|
| 65 |
-
|
| 66 |
-
yield text
|
| 67 |
except Exception as e:
|
| 68 |
raise APIError(f"LLM Get Text Error: Unexpected error: {e}")
|
| 69 |
|
|
@@ -109,7 +107,10 @@ class LLMManager:
|
|
| 109 |
Get a problem from the LLM based on the given requirements, difficulty, and topic.
|
| 110 |
"""
|
| 111 |
messages = self.get_problem_prepare_messages(requirements, difficulty, topic, interview_type)
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
def update_chat_history(
|
| 115 |
self, code: str, previous_code: str, chat_history: List[Dict[str, str]], chat_display: List[List[Optional[str]]]
|
|
@@ -123,29 +124,6 @@ class LLMManager:
|
|
| 123 |
chat_history.append({"role": "user", "content": message})
|
| 124 |
return chat_history
|
| 125 |
|
| 126 |
-
def send_request(
|
| 127 |
-
self, code: str, previous_code: str, chat_history: List[Dict[str, str]], chat_display: List[List[Optional[str]]]
|
| 128 |
-
) -> Generator[Tuple[List[Dict[str, str]], List[List[Optional[str]]], str], None, None]:
|
| 129 |
-
"""
|
| 130 |
-
Send a request to the LLM and update the chat display.
|
| 131 |
-
"""
|
| 132 |
-
chat_history = self.update_chat_history(code, previous_code, chat_history, chat_display)
|
| 133 |
-
original_len = len(chat_display)
|
| 134 |
-
chat_history.append({"role": "assistant", "content": ""})
|
| 135 |
-
reply = self.get_text(chat_history)
|
| 136 |
-
for message in reply:
|
| 137 |
-
chat_history[-1]["content"] = message
|
| 138 |
-
text_to_display = message.split("#NOTES#")[0].strip()
|
| 139 |
-
split_messages = text_to_display.split("\n\n")
|
| 140 |
-
chat_display = chat_display[:original_len]
|
| 141 |
-
for m in split_messages:
|
| 142 |
-
if m.strip():
|
| 143 |
-
chat_display.append([None, m])
|
| 144 |
-
if len(chat_display) == original_len:
|
| 145 |
-
chat_display.append([None, ""])
|
| 146 |
-
|
| 147 |
-
yield chat_history, chat_display, code
|
| 148 |
-
|
| 149 |
def end_interview_prepare_messages(
|
| 150 |
self, problem_description: str, chat_history: List[Dict[str, str]], interview_type: str
|
| 151 |
) -> List[Dict[str, str]]:
|
|
@@ -171,4 +149,7 @@ class LLMManager:
|
|
| 171 |
yield "No interview history available"
|
| 172 |
return
|
| 173 |
messages = self.end_interview_prepare_messages(problem_description, chat_history, interview_type)
|
| 174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
response = self.client.chat.completions.create(
|
| 60 |
model=self.config.llm.name, messages=messages, temperature=1, stream=True, max_tokens=2000
|
| 61 |
)
|
|
|
|
| 62 |
for chunk in response:
|
| 63 |
if chunk.choices[0].delta.content:
|
| 64 |
+
yield chunk.choices[0].delta.content
|
|
|
|
| 65 |
except Exception as e:
|
| 66 |
raise APIError(f"LLM Get Text Error: Unexpected error: {e}")
|
| 67 |
|
|
|
|
| 107 |
Get a problem from the LLM based on the given requirements, difficulty, and topic.
|
| 108 |
"""
|
| 109 |
messages = self.get_problem_prepare_messages(requirements, difficulty, topic, interview_type)
|
| 110 |
+
problem = ""
|
| 111 |
+
for text in self.get_text(messages):
|
| 112 |
+
problem += text
|
| 113 |
+
yield problem
|
| 114 |
|
| 115 |
def update_chat_history(
|
| 116 |
self, code: str, previous_code: str, chat_history: List[Dict[str, str]], chat_display: List[List[Optional[str]]]
|
|
|
|
| 124 |
chat_history.append({"role": "user", "content": message})
|
| 125 |
return chat_history
|
| 126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
def end_interview_prepare_messages(
|
| 128 |
self, problem_description: str, chat_history: List[Dict[str, str]], interview_type: str
|
| 129 |
) -> List[Dict[str, str]]:
|
|
|
|
| 149 |
yield "No interview history available"
|
| 150 |
return
|
| 151 |
messages = self.end_interview_prepare_messages(problem_description, chat_history, interview_type)
|
| 152 |
+
feedback = ""
|
| 153 |
+
for text in self.get_text(messages):
|
| 154 |
+
feedback += text
|
| 155 |
+
yield feedback
|
resources/prompts.py
CHANGED
|
@@ -27,7 +27,9 @@ You are an AI conducting an interview. Your role is to manage the interview effe
|
|
| 27 |
- Make notes when you encounter: mistakes, bugs, incorrect statements, missed important aspects, any other observations.
|
| 28 |
- There should be no other delimiters in your response. Only #NOTES# is a valid delimiter, everything else will be treated just like text.
|
| 29 |
|
| 30 |
-
-
|
|
|
|
|
|
|
| 31 |
|
| 32 |
- You should direct the interview strictly rather than helping the candidate solve the problem.
|
| 33 |
- Be very concise in your responses. Allow the candidate to lead the discussion, ensuring they speak more than you do.
|
|
|
|
| 27 |
- Make notes when you encounter: mistakes, bugs, incorrect statements, missed important aspects, any other observations.
|
| 28 |
- There should be no other delimiters in your response. Only #NOTES# is a valid delimiter, everything else will be treated just like text.
|
| 29 |
|
| 30 |
+
- Your visible messages will be read out loud to the candidate.
|
| 31 |
+
- Use mostly plain text, avoid markdown and complex formatting, unless necessary avoid code and formulas in the visible messages.
|
| 32 |
+
- Use '\n\n' to split your message in short logical parts, so it will be easier to read for the candidate.
|
| 33 |
|
| 34 |
- You should direct the interview strictly rather than helping the candidate solve the problem.
|
| 35 |
- Be very concise in your responses. Allow the candidate to lead the discussion, ensuring they speak more than you do.
|
ui/coding.py
CHANGED
|
@@ -2,8 +2,70 @@ import gradio as gr
|
|
| 2 |
import numpy as np
|
| 3 |
import os
|
| 4 |
|
|
|
|
|
|
|
| 5 |
from resources.data import fixed_messages, topic_lists
|
| 6 |
from utils.ui import add_candidate_message, add_interviewer_message
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
|
| 9 |
def change_code_area(interview_type):
|
|
@@ -25,6 +87,8 @@ def change_code_area(interview_type):
|
|
| 25 |
|
| 26 |
|
| 27 |
def get_problem_solving_ui(llm, tts, stt, default_audio_params, audio_output):
|
|
|
|
|
|
|
| 28 |
with gr.Tab("Interview", render=False, elem_id=f"tab") as problem_tab:
|
| 29 |
chat_history = gr.State([])
|
| 30 |
previous_code = gr.State("")
|
|
@@ -169,11 +233,11 @@ def get_problem_solving_ui(llm, tts, stt, default_audio_params, audio_output):
|
|
| 169 |
send_btn.click(fn=add_candidate_message, inputs=[message, chat], outputs=[chat]).success(
|
| 170 |
fn=lambda: None, outputs=[message]
|
| 171 |
).success(
|
| 172 |
-
fn=
|
| 173 |
inputs=[code, previous_code, chat_history, chat],
|
| 174 |
-
outputs=[chat_history, chat, previous_code],
|
| 175 |
-
|
| 176 |
-
fn=tts.read_last_message, inputs=[chat], outputs=[audio_output]
|
| 177 |
).success(
|
| 178 |
fn=lambda: np.array([], dtype=np.int16), outputs=[audio_buffer]
|
| 179 |
).success(
|
|
@@ -187,7 +251,6 @@ def get_problem_solving_ui(llm, tts, stt, default_audio_params, audio_output):
|
|
| 187 |
outputs=[transcript, audio_buffer, message],
|
| 188 |
show_progress="hidden",
|
| 189 |
)
|
| 190 |
-
audio_input.stop_recording(fn=lambda: gr.update(interactive=True), outputs=[send_btn])
|
| 191 |
else:
|
| 192 |
audio_input.stop_recording(fn=stt.speech_to_text_full, inputs=[audio_input], outputs=[message]).success(
|
| 193 |
fn=lambda: gr.update(interactive=True), outputs=[send_btn]
|
|
|
|
| 2 |
import numpy as np
|
| 3 |
import os
|
| 4 |
|
| 5 |
+
from itertools import chain
|
| 6 |
+
|
| 7 |
from resources.data import fixed_messages, topic_lists
|
| 8 |
from utils.ui import add_candidate_message, add_interviewer_message
|
| 9 |
+
from typing import List, Dict, Generator, Optional, Tuple
|
| 10 |
+
from functools import partial
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def send_request(
|
| 14 |
+
code: str, previous_code: str, chat_history: List[Dict[str, str]], chat_display: List[List[Optional[str]]], llm, tts
|
| 15 |
+
) -> Generator[Tuple[List[Dict[str, str]], List[List[Optional[str]]], str, bytes], None, None]:
|
| 16 |
+
"""
|
| 17 |
+
Send a request to the LLM and update the chat display and translate it to speech.
|
| 18 |
+
"""
|
| 19 |
+
# TODO: Find the way to simplify it and remove duplication in logic
|
| 20 |
+
chat_history = llm.update_chat_history(code, previous_code, chat_history, chat_display)
|
| 21 |
+
original_len = len(chat_display)
|
| 22 |
+
chat_display.append([None, ""])
|
| 23 |
+
chat_history.append({"role": "assistant", "content": ""})
|
| 24 |
+
|
| 25 |
+
text_chunks = []
|
| 26 |
+
reply = llm.get_text(chat_history)
|
| 27 |
+
|
| 28 |
+
audio_generator = iter(())
|
| 29 |
+
has_text_item = True
|
| 30 |
+
has_audion_item = True
|
| 31 |
+
audio_created = 0
|
| 32 |
+
is_notes = False
|
| 33 |
+
|
| 34 |
+
while has_text_item or has_audion_item:
|
| 35 |
+
try:
|
| 36 |
+
text_chunk = next(reply)
|
| 37 |
+
text_chunks.append(text_chunk)
|
| 38 |
+
has_text_item = True
|
| 39 |
+
except StopIteration:
|
| 40 |
+
has_text_item = False
|
| 41 |
+
chat_history[-1]["content"] = "".join(text_chunks)
|
| 42 |
+
|
| 43 |
+
try:
|
| 44 |
+
audio_chunk = next(audio_generator)
|
| 45 |
+
has_audion_item = True
|
| 46 |
+
except StopIteration:
|
| 47 |
+
audio_chunk = b""
|
| 48 |
+
has_audion_item = False
|
| 49 |
+
|
| 50 |
+
if has_text_item and not is_notes:
|
| 51 |
+
last_message = chat_display[-1][1]
|
| 52 |
+
last_message += text_chunk
|
| 53 |
+
|
| 54 |
+
split_notes = last_message.split("#NOTES#")
|
| 55 |
+
if len(split_notes) > 1:
|
| 56 |
+
is_notes = True
|
| 57 |
+
last_message = split_notes[0]
|
| 58 |
+
split_messages = last_message.split("\n\n")
|
| 59 |
+
chat_display[-1][1] = split_messages[0]
|
| 60 |
+
for m in split_messages[1:]:
|
| 61 |
+
chat_display.append([None, m])
|
| 62 |
+
|
| 63 |
+
if len(chat_display) - original_len > audio_created + has_text_item:
|
| 64 |
+
audio_generator = chain(audio_generator, tts.read_text(chat_display[original_len + audio_created][1]))
|
| 65 |
+
audio_created += 1
|
| 66 |
+
has_audion_item = True
|
| 67 |
+
|
| 68 |
+
yield chat_history, chat_display, code, audio_chunk
|
| 69 |
|
| 70 |
|
| 71 |
def change_code_area(interview_type):
|
|
|
|
| 87 |
|
| 88 |
|
| 89 |
def get_problem_solving_ui(llm, tts, stt, default_audio_params, audio_output):
|
| 90 |
+
send_request_partial = partial(send_request, llm=llm, tts=tts)
|
| 91 |
+
|
| 92 |
with gr.Tab("Interview", render=False, elem_id=f"tab") as problem_tab:
|
| 93 |
chat_history = gr.State([])
|
| 94 |
previous_code = gr.State("")
|
|
|
|
| 233 |
send_btn.click(fn=add_candidate_message, inputs=[message, chat], outputs=[chat]).success(
|
| 234 |
fn=lambda: None, outputs=[message]
|
| 235 |
).success(
|
| 236 |
+
fn=send_request_partial,
|
| 237 |
inputs=[code, previous_code, chat_history, chat],
|
| 238 |
+
outputs=[chat_history, chat, previous_code, audio_output],
|
| 239 |
+
# ).success(
|
| 240 |
+
# fn=tts.read_last_message, inputs=[chat], outputs=[audio_output]
|
| 241 |
).success(
|
| 242 |
fn=lambda: np.array([], dtype=np.int16), outputs=[audio_buffer]
|
| 243 |
).success(
|
|
|
|
| 251 |
outputs=[transcript, audio_buffer, message],
|
| 252 |
show_progress="hidden",
|
| 253 |
)
|
|
|
|
| 254 |
else:
|
| 255 |
audio_input.stop_recording(fn=stt.speech_to_text_full, inputs=[audio_input], outputs=[message]).success(
|
| 256 |
fn=lambda: gr.update(interactive=True), outputs=[send_btn]
|