Spaces:
Sleeping
Sleeping
Commit
·
324d83a
1
Parent(s):
fb03edc
Added grader
Browse files- tests/candidate.py +2 -2
- tests/grader.py +43 -0
- tests/test_e2e.py +8 -2
- tests/{tessting_prompts.py → testing_prompts.py} +0 -0
tests/candidate.py
CHANGED
|
@@ -13,13 +13,13 @@ from api.llm import LLMManager
|
|
| 13 |
from config import config
|
| 14 |
from resources.data import fixed_messages, topic_lists
|
| 15 |
from resources.prompts import prompts
|
| 16 |
-
from tests.
|
| 17 |
|
| 18 |
load_dotenv()
|
| 19 |
|
| 20 |
|
| 21 |
def complete_interview(interview_type, exp_name, requirements="", difficulty="", topic="", model="gpt-3.5-turbo"):
|
| 22 |
-
client = OpenAI()
|
| 23 |
llm = LLMManager(config, prompts)
|
| 24 |
llm_name = config.llm.name
|
| 25 |
|
|
|
|
| 13 |
from config import config
|
| 14 |
from resources.data import fixed_messages, topic_lists
|
| 15 |
from resources.prompts import prompts
|
| 16 |
+
from tests.testing_prompts import candidate_prompt
|
| 17 |
|
| 18 |
load_dotenv()
|
| 19 |
|
| 20 |
|
| 21 |
def complete_interview(interview_type, exp_name, requirements="", difficulty="", topic="", model="gpt-3.5-turbo"):
|
| 22 |
+
client = OpenAI(url="https://api.openai.com/v1")
|
| 23 |
llm = LLMManager(config, prompts)
|
| 24 |
llm_name = config.llm.name
|
| 25 |
|
tests/grader.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
from openai import OpenAI
|
| 4 |
+
|
| 5 |
+
from tests.testing_prompts import grader_prompt
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def grade(json_file_path, model="gpt-4-turbo"):
|
| 9 |
+
client = OpenAI(url="https://api.openai.com/v1")
|
| 10 |
+
|
| 11 |
+
with open(json_file_path) as file:
|
| 12 |
+
interview_data = json.load(file)
|
| 13 |
+
|
| 14 |
+
messages = [
|
| 15 |
+
{"role": "system", "content": grader_prompt},
|
| 16 |
+
{"role": "user", "content": f"Interview data: {interview_data}"},
|
| 17 |
+
{"role": "user", "content": "Please evaluate the interview."},
|
| 18 |
+
]
|
| 19 |
+
|
| 20 |
+
response = client.chat.completions.create(model=model, messages=messages, temperature=1, response_format={"type": "json_object"})
|
| 21 |
+
feedback = json.loads(response.choices[0].message.content)
|
| 22 |
+
|
| 23 |
+
feedback["file_name"] = json_file_path
|
| 24 |
+
feedback["agent_llm"] = interview_data["interviewer_llm"]
|
| 25 |
+
feedback["candidate_llm"] = interview_data["candidate_llm"]
|
| 26 |
+
feedback["type"] = interview_data["inputs"]["interview_type"]
|
| 27 |
+
feedback["difficulty"] = interview_data["inputs"]["difficulty"]
|
| 28 |
+
feedback["topic"] = interview_data["inputs"]["topic"]
|
| 29 |
+
feedback["average_response_time_seconds"] = interview_data["average_response_time_seconds"]
|
| 30 |
+
feedback["number_of_messages"] = len(interview_data["transcript"])
|
| 31 |
+
|
| 32 |
+
scores = [
|
| 33 |
+
feedback[x]
|
| 34 |
+
for x in feedback
|
| 35 |
+
if x.startswith("interviewer_") or x.startswith("feedback_") or x.startswith("problem_") and feedback[x] is not None
|
| 36 |
+
]
|
| 37 |
+
feedback["overall_score"] = sum(scores) / len(scores)
|
| 38 |
+
|
| 39 |
+
# save results to json file in the same folder as the interview data
|
| 40 |
+
with open(json_file_path.replace(".json", "_feedback.json"), "w") as file:
|
| 41 |
+
json.dump(feedback, file, indent=4)
|
| 42 |
+
|
| 43 |
+
return feedback
|
tests/test_e2e.py
CHANGED
|
@@ -1,6 +1,12 @@
|
|
| 1 |
from tests.candidate import complete_interview
|
|
|
|
| 2 |
|
| 3 |
|
| 4 |
def test_complete_interview():
|
| 5 |
-
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from tests.candidate import complete_interview
|
| 2 |
+
from tests.grader import grade
|
| 3 |
|
| 4 |
|
| 5 |
def test_complete_interview():
|
| 6 |
+
for _ in range(3):
|
| 7 |
+
file_path, _ = complete_interview("coding", "test", model="gpt-3.5-turbo")
|
| 8 |
+
feedback = grade(file_path, model="gpt-4-turbo")
|
| 9 |
+
assert feedback["overall_score"] > 0.5
|
| 10 |
+
if feedback["overall_score"] > 0.8:
|
| 11 |
+
return
|
| 12 |
+
assert False
|
tests/{tessting_prompts.py → testing_prompts.py}
RENAMED
|
File without changes
|