Spaces:
Running
Running
import json | |
import re | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
def load_model(): | |
# Using a public model for code evaluation. | |
model_name = "Salesforce/codegen-350M-mono" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
return tokenizer, model | |
def evaluate_code(question, code): | |
# Refined prompt to enforce JSON-only output. | |
prompt = f"""You are an expert code evaluator. | |
Evaluate the user's solution to the following problem. | |
Return ONLY a JSON object with two keys: | |
- "stars": an integer between 0 and 5 (0 means completely incorrect, 5 means excellent). | |
- "feedback": a concise message. | |
Do not include any additional text. | |
Problem: "{question}" | |
Solution: "{code}" | |
""" | |
# Load model and tokenizer. | |
tokenizer, model = load_model() | |
# Generate a response with reduced max tokens and a lower temperature for determinism. | |
inputs = tokenizer(prompt, return_tensors="pt") | |
outputs = model.generate( | |
**inputs, | |
max_new_tokens=100, | |
temperature=0.2, | |
pad_token_id=tokenizer.eos_token_id | |
) | |
response_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Attempt to extract the JSON object from the response. | |
match = re.search(r'\{.*\}', response_text) | |
if match: | |
json_text = match.group(0) | |
try: | |
result = json.loads(json_text) | |
except Exception as e: | |
result = {"stars": 0, "feedback": "Evaluation failed. Unable to parse AI response."} | |
else: | |
result = {"stars": 0, "feedback": "Evaluation failed. Unable to extract JSON from AI response."} | |
return result | |
# For direct testing from the command line. | |
if __name__ == "__main__": | |
import sys | |
if len(sys.argv) < 3: | |
print(json.dumps({"error": "Please provide a question and code as arguments"})) | |
sys.exit(1) | |
question = sys.argv[1] | |
code = sys.argv[2] | |
result = evaluate_code(question, code) | |
print(json.dumps(result)) | |