File size: 3,765 Bytes
e65d0ad
8206a45
 
e65d0ad
8206a45
 
3d71f2e
8206a45
 
 
4441c50
 
8206a45
 
 
e65d0ad
b13d31f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8206a45
 
3c44ee8
35d31e1
 
 
 
 
 
 
3c44ee8
35d31e1
 
3c44ee8
35d31e1
903f0f8
8206a45
903f0f8
b72b033
8206a45
 
 
 
b13d31f
 
8206a45
4980b54
0bae633
8206a45
4980b54
b72b033
b13d31f
8206a45
e65d0ad
8206a45
e65d0ad
8206a45
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import json
import re
from transformers import AutoTokenizer, AutoModelForCausalLM

# Global variables for caching the model and tokenizer
tokenizer, model = None, None

def load_model():
    global tokenizer, model
    if tokenizer is None or model is None:
        # Use the DeepSeek instruct model for code evaluation.
        model_name = "deepseek-ai/deepseek-coder-1.3b-instruct"
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForCausalLM.from_pretrained(model_name)
    return tokenizer, model

def extract_json(response_text):
    # First, try to extract all JSON blocks using regex with DOTALL.
    matches = re.findall(r'\{.*?\}', response_text, re.DOTALL)
    for m in matches:
        json_text = m.strip()
        try:
            temp = json.loads(json_text)
            if isinstance(temp, dict) and "stars" in temp and "feedback" in temp:
                return temp
        except Exception:
            continue

    # Fallback: try splitting the text on "Your response:" and then extract JSON.
    parts = response_text.split("Your response:")
    if len(parts) > 1:
        possible = parts[-1].strip()
        # Try to extract JSON from this part.
        try:
            temp = json.loads(possible)
            if isinstance(temp, dict) and "stars" in temp and "feedback" in temp:
                return temp
        except Exception:
            # If it fails, try regex on this part.
            matches = re.findall(r'\{.*?\}', possible, re.DOTALL)
            for m in matches:
                json_text = m.strip()
                try:
                    temp = json.loads(json_text)
                    if isinstance(temp, dict) and "stars" in temp and "feedback" in temp:
                        return temp
                except Exception:
                    continue

    # If all methods fail, return a fallback result.
    return {"stars": 0, "feedback": "Evaluation failed. Unable to extract valid JSON from AI response."}

def evaluate_code(question, code):
    prompt = f"""You are an expert code evaluator.
Evaluate the following solution for the given problem.
Rate the solution as follows:
  - 5 stars: Perfect solution; it is correct, efficient, and follows best practices.
  - 4 stars: Correct solution with minor issues or improvements possible.
  - 3 stars: Partially correct solution with noticeable issues.
  - 2 stars: Incorrect solution with some correct elements.
  - 1 star: Mostly incorrect solution.
  - 0 stars: Completely incorrect solution.
Respond with exactly one JSON object (with no extra text) that has exactly two keys:
  "stars": an integer between 0 and 5,
  "feedback": a concise string message explaining your rating.
The JSON must start with '{{' and end with '}}'.
Do not output any additional text.
Question: "{question}"
Solution: "{code}"
Your response:"""
    
    tokenizer, model = load_model()
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(
        **inputs,
        max_new_tokens=120,      # Allow enough tokens for a complete response
        temperature=0.2,         # Small randomness for creativity
        pad_token_id=tokenizer.eos_token_id,
        do_sample=True
    )
    response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print("Raw model response:", response_text)  # Debug output

    result = extract_json(response_text)
    return result

# For direct command-line testing.
if __name__ == "__main__":
    import sys
    if len(sys.argv) < 3:
        print(json.dumps({"error": "Please provide a question and code as arguments"}))
        sys.exit(1)
    question = sys.argv[1]
    code = sys.argv[2]
    result = evaluate_code(question, code)
    print(json.dumps(result))