DEADLOCK007X commited on
Commit
35d31e1
·
1 Parent(s): b72b033

Refine evaluation prompt and extraction for DeepSeek instruct model

Browse files
Files changed (1) hide show
  1. tinyllama_inference.py +16 -9
tinyllama_inference.py CHANGED
@@ -15,13 +15,21 @@ def load_model():
15
  return tokenizer, model
16
 
17
  def evaluate_code(question, code):
 
18
  prompt = f"""You are an expert code evaluator.
19
  Evaluate the following solution for the given problem.
 
 
 
 
 
 
 
20
  Respond with exactly one JSON object (with no extra text) that has exactly two keys:
21
- "stars": an integer between 0 and 5 (0 means completely incorrect, 5 means excellent),
22
- "feedback": a concise string message.
23
  The JSON must start with '{{' and end with '}}'.
24
- Do not output anything else.
25
  Question: "{question}"
26
  Solution: "{code}"
27
  Your response:"""
@@ -30,21 +38,20 @@ Your response:"""
30
  inputs = tokenizer(prompt, return_tensors="pt")
31
  outputs = model.generate(
32
  **inputs,
33
- max_new_tokens=100, # Allow enough tokens for a complete response
34
- temperature=0.2, # Small randomness for creativity
35
  pad_token_id=tokenizer.eos_token_id,
36
- do_sample=True # Enable sampling to encourage generation
37
  )
38
  response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
39
- print("Raw model response:", response_text) # Debug output
40
 
41
- # Use regex to extract all JSON objects (non-greedy)
42
  matches = re.findall(r'\{.*?\}', response_text)
43
  result = None
44
  for m in matches:
45
  try:
46
  temp = json.loads(m)
47
- # Check that the parsed JSON contains both expected keys
48
  if isinstance(temp, dict) and "stars" in temp and "feedback" in temp:
49
  result = temp
50
  break
 
15
  return tokenizer, model
16
 
17
  def evaluate_code(question, code):
18
+ # Refined prompt with explicit rating criteria.
19
  prompt = f"""You are an expert code evaluator.
20
  Evaluate the following solution for the given problem.
21
+ Rate the solution as follows:
22
+ - 5 stars: Perfect solution; it is correct, efficient, and follows best practices.
23
+ - 4 stars: Correct solution with minor issues or improvements possible.
24
+ - 3 stars: Partially correct solution with noticeable issues.
25
+ - 2 stars: Incorrect solution with some correct elements.
26
+ - 1 star: Mostly incorrect solution.
27
+ - 0 stars: Completely incorrect solution.
28
  Respond with exactly one JSON object (with no extra text) that has exactly two keys:
29
+ "stars": an integer between 0 and 5,
30
+ "feedback": a concise string message explaining your rating.
31
  The JSON must start with '{{' and end with '}}'.
32
+ Do not output any additional text.
33
  Question: "{question}"
34
  Solution: "{code}"
35
  Your response:"""
 
38
  inputs = tokenizer(prompt, return_tensors="pt")
39
  outputs = model.generate(
40
  **inputs,
41
+ max_new_tokens=120, # Increase token allowance for a complete evaluation
42
+ temperature=0.1, # A low temperature for more deterministic output
43
  pad_token_id=tokenizer.eos_token_id,
44
+ do_sample=True # Enable sampling to allow some creativity
45
  )
46
  response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
47
+ print("Raw model response:", response_text) # Debug: inspect raw output
48
 
49
+ # Extract all JSON objects using non-greedy regex and select the one with expected keys
50
  matches = re.findall(r'\{.*?\}', response_text)
51
  result = None
52
  for m in matches:
53
  try:
54
  temp = json.loads(m)
 
55
  if isinstance(temp, dict) and "stars" in temp and "feedback" in temp:
56
  result = temp
57
  break