File size: 2,526 Bytes
e65d0ad
8206a45
 
e65d0ad
8206a45
 
3d71f2e
8206a45
 
 
4441c50
 
8206a45
 
 
e65d0ad
8206a45
 
3c44ee8
 
8206a45
3c44ee8
 
b72b033
903f0f8
8206a45
903f0f8
b72b033
8206a45
 
 
 
903f0f8
b72b033
8206a45
b72b033
0bae633
8206a45
3c44ee8
b72b033
 
3c44ee8
b72b033
 
8206a45
b72b033
 
 
 
 
 
 
 
 
8206a45
 
e65d0ad
8206a45
e65d0ad
8206a45
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import json
import re
from transformers import AutoTokenizer, AutoModelForCausalLM

# Global variables for caching the model and tokenizer
tokenizer, model = None, None

def load_model():
    global tokenizer, model
    if tokenizer is None or model is None:
        # Use the DeepSeek instruct model for code evaluation.
        model_name = "deepseek-ai/deepseek-coder-1.3b-instruct"
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForCausalLM.from_pretrained(model_name)
    return tokenizer, model

def evaluate_code(question, code):
    prompt = f"""You are an expert code evaluator.
Evaluate the following solution for the given problem.
Respond with exactly one JSON object (with no extra text) that has exactly two keys:
  "stars": an integer between 0 and 5 (0 means completely incorrect, 5 means excellent),
  "feedback": a concise string message.
The JSON must start with '{{' and end with '}}'.
Do not output anything else.
Question: "{question}"
Solution: "{code}"
Your response:"""
    
    tokenizer, model = load_model()
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(
        **inputs,
        max_new_tokens=100,      # Allow enough tokens for a complete response
        temperature=0.2,         # Small randomness for creativity
        pad_token_id=tokenizer.eos_token_id,
        do_sample=True            # Enable sampling to encourage generation
    )
    response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print("Raw model response:", response_text)  # Debug output

    # Use regex to extract all JSON objects (non-greedy)
    matches = re.findall(r'\{.*?\}', response_text)
    result = None
    for m in matches:
        try:
            temp = json.loads(m)
            # Check that the parsed JSON contains both expected keys
            if isinstance(temp, dict) and "stars" in temp and "feedback" in temp:
                result = temp
                break
        except Exception:
            continue
    if result is None:
        result = {"stars": 0, "feedback": "Evaluation failed. Unable to extract valid JSON from AI response."}
    
    return result

# For direct command-line testing.
if __name__ == "__main__":
    import sys
    if len(sys.argv) < 3:
        print(json.dumps({"error": "Please provide a question and code as arguments"}))
        sys.exit(1)
    question = sys.argv[1]
    code = sys.argv[2]
    result = evaluate_code(question, code)
    print(json.dumps(result))