import json import re from transformers import AutoTokenizer, AutoModelForCausalLM # Global variables for caching the model and tokenizer tokenizer, model = None, None def load_model(): global tokenizer, model if tokenizer is None or model is None: # Use the DeepSeek instruct model for code evaluation. model_name = "deepseek-ai/deepseek-coder-1.3b-instruct" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) return tokenizer, model def extract_json(response_text): # First attempt: Use regex (non-greedy, with DOTALL) to find JSON blocks matches = re.findall(r'\{.*?\}', response_text, re.DOTALL) # Check the matches in reverse order (last one might be the evaluation output) for m in reversed(matches): try: temp = json.loads(m) if isinstance(temp, dict) and "stars" in temp and "feedback" in temp: return temp except Exception: continue # Fallback: try extracting JSON from each line that looks like a JSON object json_lines = [line.strip() for line in response_text.splitlines() if line.strip().startswith('{') and line.strip().endswith('}')] for line in reversed(json_lines): try: temp = json.loads(line) if isinstance(temp, dict) and "stars" in temp and "feedback" in temp: return temp except Exception: continue return {"stars": 0, "feedback": "Evaluation failed. Unable to extract valid JSON from AI response."} def evaluate_code(question, code): prompt = f"""You are an expert code evaluator. For the following problem and solution, provide exactly one JSON object that contains your evaluation. The JSON object must have exactly two keys: "stars": an integer between 0 and 5, where 5 means a perfect solution and 0 means completely incorrect. "feedback": a concise explanation of your rating. Do not include any extra text, examples, or multiple responses. Only evaluate the code provided below. Question: "{question}" Solution: "{code}" Your response:""" # ... rest of the code remains unchanged ... tokenizer, model = load_model() inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate( **inputs, max_new_tokens=120, # Increase token allowance for a complete response temperature=0.2, # Low randomness for deterministic output pad_token_id=tokenizer.eos_token_id, do_sample=True ) response_text = tokenizer.decode(outputs[0], skip_special_tokens=True) print("Raw model response:", response_text) # Debug: Inspect raw output result = extract_json(response_text) return result # For direct command-line testing. if __name__ == "__main__": import sys if len(sys.argv) < 3: print(json.dumps({"error": "Please provide a question and code as arguments"})) sys.exit(1) question = sys.argv[1] code = sys.argv[2] result = evaluate_code(question, code) print(json.dumps(result))