Spaces:
Sleeping
Sleeping
import json | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
def load_model(): | |
# Use a public model for code evaluation. | |
model_name = "Salesforce/codegen-350M-mono" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
return tokenizer, model | |
def evaluate_code(question, code): | |
# Construct a prompt for the AI evaluator. | |
prompt = f""" | |
You are an expert code evaluator. | |
Rate the user's solution to the following problem from 0-5 (0 = completely incorrect, 5 = excellent). | |
Also provide a concise "feedback" message. | |
Problem: "{question}" | |
Solution: "{code}" | |
Return ONLY valid JSON: {{"stars": number, "feedback": string}} | |
Do not include any extra text outside the JSON. | |
""" | |
# Load the model and tokenizer. | |
tokenizer, model = load_model() | |
inputs = tokenizer(prompt, return_tensors="pt") | |
outputs = model.generate(**inputs, max_new_tokens=150) | |
response_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
try: | |
result = json.loads(response_text.strip()) | |
except Exception as e: | |
result = {"stars": 0, "feedback": "Evaluation failed. Unable to parse AI response."} | |
return result | |
# For direct testing from the command line | |
if __name__ == "__main__": | |
import sys | |
if len(sys.argv) < 3: | |
print(json.dumps({"error": "Please provide a question and code as arguments"})) | |
sys.exit(1) | |
question = sys.argv[1] | |
code = sys.argv[2] | |
result = evaluate_code(question, code) | |
print(json.dumps(result)) | |