Spaces:
Sleeping
Sleeping
File size: 3,387 Bytes
e65d0ad 8206a45 e65d0ad 8206a45 3d71f2e 8206a45 4441c50 8206a45 e65d0ad b13d31f e62582d b13d31f e62582d b13d31f e62582d b13d31f e62582d b13d31f e62582d b13d31f e62582d b13d31f 8206a45 3c44ee8 35d31e1 3c44ee8 35d31e1 3c44ee8 35d31e1 903f0f8 8206a45 903f0f8 b72b033 8206a45 e62582d 8206a45 4980b54 0bae633 8206a45 e62582d b72b033 b13d31f 8206a45 e65d0ad 8206a45 e65d0ad 8206a45 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import json
import re
from transformers import AutoTokenizer, AutoModelForCausalLM
# Global variables for caching the model and tokenizer
tokenizer, model = None, None
def load_model():
global tokenizer, model
if tokenizer is None or model is None:
# Use the DeepSeek instruct model for code evaluation.
model_name = "deepseek-ai/deepseek-coder-1.3b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
return tokenizer, model
def extract_json(response_text):
# First attempt: Use regex (non-greedy, with DOTALL) to find JSON blocks
matches = re.findall(r'\{.*?\}', response_text, re.DOTALL)
# Check the matches in reverse order (last one might be the evaluation output)
for m in reversed(matches):
try:
temp = json.loads(m)
if isinstance(temp, dict) and "stars" in temp and "feedback" in temp:
return temp
except Exception:
continue
# Fallback: try extracting JSON from each line that looks like a JSON object
json_lines = [line.strip() for line in response_text.splitlines() if line.strip().startswith('{') and line.strip().endswith('}')]
for line in reversed(json_lines):
try:
temp = json.loads(line)
if isinstance(temp, dict) and "stars" in temp and "feedback" in temp:
return temp
except Exception:
continue
return {"stars": 0, "feedback": "Evaluation failed. Unable to extract valid JSON from AI response."}
def evaluate_code(question, code):
prompt = f"""You are an expert code evaluator.
Evaluate the following solution for the given problem.
Rate the solution as follows:
- 5 stars: Perfect solution; it is correct, efficient, and follows best practices.
- 4 stars: Correct solution with minor issues or improvements possible.
- 3 stars: Partially correct solution with noticeable issues.
- 2 stars: Incorrect solution with some correct elements.
- 1 star: Mostly incorrect solution.
- 0 stars: Completely incorrect solution.
Respond with exactly one JSON object (with no extra text) that has exactly two keys:
"stars": an integer between 0 and 5,
"feedback": a concise string message explaining your rating.
The JSON must start with '{{' and end with '}}'.
Do not output any additional text.
Question: "{question}"
Solution: "{code}"
Your response:"""
tokenizer, model = load_model()
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(
**inputs,
max_new_tokens=120, # Increase token allowance for a complete response
temperature=0.2, # Low randomness for deterministic output
pad_token_id=tokenizer.eos_token_id,
do_sample=True
)
response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Raw model response:", response_text) # Debug: Inspect raw output
result = extract_json(response_text)
return result
# For direct command-line testing.
if __name__ == "__main__":
import sys
if len(sys.argv) < 3:
print(json.dumps({"error": "Please provide a question and code as arguments"}))
sys.exit(1)
question = sys.argv[1]
code = sys.argv[2]
result = evaluate_code(question, code)
print(json.dumps(result))
|