Spaces:
Running
Running
File size: 2,526 Bytes
e65d0ad 8206a45 e65d0ad 8206a45 3d71f2e 8206a45 4441c50 8206a45 e65d0ad 8206a45 3c44ee8 8206a45 3c44ee8 b72b033 903f0f8 8206a45 903f0f8 b72b033 8206a45 903f0f8 b72b033 8206a45 b72b033 0bae633 8206a45 3c44ee8 b72b033 3c44ee8 b72b033 8206a45 b72b033 8206a45 e65d0ad 8206a45 e65d0ad 8206a45 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import json
import re
from transformers import AutoTokenizer, AutoModelForCausalLM
# Global variables for caching the model and tokenizer
tokenizer, model = None, None
def load_model():
global tokenizer, model
if tokenizer is None or model is None:
# Use the DeepSeek instruct model for code evaluation.
model_name = "deepseek-ai/deepseek-coder-1.3b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
return tokenizer, model
def evaluate_code(question, code):
prompt = f"""You are an expert code evaluator.
Evaluate the following solution for the given problem.
Respond with exactly one JSON object (with no extra text) that has exactly two keys:
"stars": an integer between 0 and 5 (0 means completely incorrect, 5 means excellent),
"feedback": a concise string message.
The JSON must start with '{{' and end with '}}'.
Do not output anything else.
Question: "{question}"
Solution: "{code}"
Your response:"""
tokenizer, model = load_model()
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(
**inputs,
max_new_tokens=100, # Allow enough tokens for a complete response
temperature=0.2, # Small randomness for creativity
pad_token_id=tokenizer.eos_token_id,
do_sample=True # Enable sampling to encourage generation
)
response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Raw model response:", response_text) # Debug output
# Use regex to extract all JSON objects (non-greedy)
matches = re.findall(r'\{.*?\}', response_text)
result = None
for m in matches:
try:
temp = json.loads(m)
# Check that the parsed JSON contains both expected keys
if isinstance(temp, dict) and "stars" in temp and "feedback" in temp:
result = temp
break
except Exception:
continue
if result is None:
result = {"stars": 0, "feedback": "Evaluation failed. Unable to extract valid JSON from AI response."}
return result
# For direct command-line testing.
if __name__ == "__main__":
import sys
if len(sys.argv) < 3:
print(json.dumps({"error": "Please provide a question and code as arguments"}))
sys.exit(1)
question = sys.argv[1]
code = sys.argv[2]
result = evaluate_code(question, code)
print(json.dumps(result))
|