import json import re from transformers import AutoTokenizer, AutoModelForCausalLM def load_model(): # Using a public model for code evaluation. model_name = "Salesforce/codegen-350M-mono" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) return tokenizer, model def evaluate_code(question, code): # Refined prompt to enforce JSON-only output. prompt = f"""You are an expert code evaluator. Evaluate the user's solution to the following problem. Return ONLY a JSON object with two keys: - "stars": an integer between 0 and 5 (0 means completely incorrect, 5 means excellent). - "feedback": a concise message. Do not include any additional text. Problem: "{question}" Solution: "{code}" """ # Load model and tokenizer. tokenizer, model = load_model() # Generate a response with reduced max tokens and a lower temperature for determinism. inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate( **inputs, max_new_tokens=100, temperature=0.2, pad_token_id=tokenizer.eos_token_id ) response_text = tokenizer.decode(outputs[0], skip_special_tokens=True) # Attempt to extract the JSON object from the response. match = re.search(r'\{.*\}', response_text) if match: json_text = match.group(0) try: result = json.loads(json_text) except Exception as e: result = {"stars": 0, "feedback": "Evaluation failed. Unable to parse AI response."} else: result = {"stars": 0, "feedback": "Evaluation failed. Unable to extract JSON from AI response."} return result # For direct testing from the command line. if __name__ == "__main__": import sys if len(sys.argv) < 3: print(json.dumps({"error": "Please provide a question and code as arguments"})) sys.exit(1) question = sys.argv[1] code = sys.argv[2] result = evaluate_code(question, code) print(json.dumps(result))