DEADLOCK007X commited on
Commit
ec5407e
·
1 Parent(s): 37475c4

Improve JSON extraction with fallback methods

Browse files
Files changed (1) hide show
  1. tinyllama_inference.py +34 -35
tinyllama_inference.py CHANGED
@@ -14,58 +14,57 @@ def load_model():
14
  model = AutoModelForCausalLM.from_pretrained(model_name)
15
  return tokenizer, model
16
 
17
- def extract_json(response_text):
18
- # First attempt: Use regex (non-greedy, with DOTALL) to find JSON blocks
19
- matches = re.findall(r'\{.*?\}', response_text, re.DOTALL)
20
- # Check the matches in reverse order (last one might be the evaluation output)
21
- for m in reversed(matches):
22
- try:
23
- temp = json.loads(m)
24
- if isinstance(temp, dict) and "stars" in temp and "feedback" in temp:
25
- return temp
26
- except Exception:
27
- continue
28
- # Fallback: try extracting JSON from each line that looks like a JSON object
29
- json_lines = [line.strip() for line in response_text.splitlines() if line.strip().startswith('{') and line.strip().endswith('}')]
30
- for line in reversed(json_lines):
31
- try:
32
- temp = json.loads(line)
33
- if isinstance(temp, dict) and "stars" in temp and "feedback" in temp:
34
- return temp
35
- except Exception:
36
- continue
37
- return {"stars": 0, "feedback": "Evaluation failed. Unable to extract valid JSON from AI response."}
38
-
39
  def evaluate_code(question, code):
 
40
  prompt = f"""You are an expert code evaluator.
41
- For the following problem and solution, provide exactly one JSON object that contains your evaluation.
42
- The JSON object must have exactly two keys:
43
- "stars": an integer between 0 and 5, where 5 means a perfect solution and 0 means completely incorrect.
44
- "feedback": a concise explanation of your rating.
45
- Do not include any extra text, examples, or multiple responses.
46
- Only evaluate the code provided below.
 
 
 
 
 
 
 
 
 
 
47
  Question: "{question}"
48
  Solution: "{code}"
49
  Your response:"""
50
- # ... rest of the code remains unchanged ...
51
-
52
 
53
  tokenizer, model = load_model()
54
  inputs = tokenizer(prompt, return_tensors="pt")
55
  outputs = model.generate(
56
  **inputs,
57
- max_new_tokens=120, # Increase token allowance for a complete response
58
- temperature=0.2, # Low randomness for deterministic output
59
  pad_token_id=tokenizer.eos_token_id,
60
  do_sample=True
61
  )
62
  response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
63
- print("Raw model response:", response_text) # Debug: Inspect raw output
64
 
65
- result = extract_json(response_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  return result
67
 
68
- # For direct command-line testing.
69
  if __name__ == "__main__":
70
  import sys
71
  if len(sys.argv) < 3:
 
14
  model = AutoModelForCausalLM.from_pretrained(model_name)
15
  return tokenizer, model
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def evaluate_code(question, code):
18
+ # Revised prompt with explicit instructions about arithmetic correctness.
19
  prompt = f"""You are an expert code evaluator.
20
+ Evaluate the following solution for the given problem.
21
+ The problem asks for a function that returns the square of a number.
22
+ A correct solution must multiply the number by itself (using x*x or x**2).
23
+ If the solution uses any other operation (such as addition), it is completely incorrect.
24
+ Rate the solution as follows:
25
+ - 5 stars: Perfect solution; the solution is correct, efficient, and follows best practices.
26
+ - 4 stars: Correct solution with minor issues.
27
+ - 3 stars: Partially correct solution with noticeable issues.
28
+ - 2 stars: Incorrect solution with some correct elements.
29
+ - 1 star: Mostly incorrect solution.
30
+ - 0 stars: Completely incorrect solution.
31
+ Respond with exactly one JSON object (with no extra text) that has exactly two keys:
32
+ "stars": an integer between 0 and 5,
33
+ "feedback": a concise string message explaining your rating.
34
+ The JSON must start with '{{' and end with '}}'.
35
+ Do not output any additional text.
36
  Question: "{question}"
37
  Solution: "{code}"
38
  Your response:"""
 
 
39
 
40
  tokenizer, model = load_model()
41
  inputs = tokenizer(prompt, return_tensors="pt")
42
  outputs = model.generate(
43
  **inputs,
44
+ max_new_tokens=120,
45
+ temperature=0.2,
46
  pad_token_id=tokenizer.eos_token_id,
47
  do_sample=True
48
  )
49
  response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
50
+ print("Raw model response:", response_text) # Debug output
51
 
52
+ # Extract JSON: use regex to capture all JSON blocks and select one with expected keys
53
+ matches = re.findall(r'\{.*?\}', response_text, re.DOTALL)
54
+ result = None
55
+ for m in reversed(matches):
56
+ try:
57
+ temp = json.loads(m)
58
+ if isinstance(temp, dict) and "stars" in temp and "feedback" in temp:
59
+ result = temp
60
+ break
61
+ except Exception:
62
+ continue
63
+ if result is None:
64
+ result = {"stars": 0, "feedback": "Evaluation failed. Unable to extract valid JSON from AI response."}
65
+
66
  return result
67
 
 
68
  if __name__ == "__main__":
69
  import sys
70
  if len(sys.argv) < 3: