Spaces:

DEADLOCK007X
/

CODEXspace

Sleeping

File size: 2,479 Bytes

e65d0ad
8206a45
 
e65d0ad
8206a45
 
3d71f2e
8206a45
 
 
4441c50
 
8206a45
 
 
e65d0ad
8206a45
6fadedd
8206a45
 
 
 
 
 
 
 
 
 
 
 
6fadedd
8206a45
 
4441c50
8206a45
 
 
0bae633
8206a45
4441c50
8aa99b8
4441c50
8aa99b8
8206a45
 
 
 
 
 
 
 
 
 
e65d0ad
8206a45
e65d0ad
8206a45

import json
import re
from transformers import AutoTokenizer, AutoModelForCausalLM

# Global variables for caching the model and tokenizer
tokenizer, model = None, None

def load_model():
    global tokenizer, model
    if tokenizer is None or model is None:
        # Use the DeepSeek instruct model for code evaluation.
        model_name = "deepseek-ai/deepseek-coder-1.3b-instruct"
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForCausalLM.from_pretrained(model_name)
    return tokenizer, model

def evaluate_code(question, code):
    # Updated prompt: instructs the model to output exactly valid JSON
    prompt = f"""You are an expert code evaluator.
Evaluate the following solution for the given problem.
Respond with exactly one JSON object (with no extra text) that has exactly two keys:
  "stars": an integer between 0 and 5 (0 means completely incorrect, 5 means excellent),
  "feedback": a concise string message.
The JSON must start with '{{' and end with '}}'.
Do not output anything else.
Problem: "{question}"
Solution: "{code}"
"""
    tokenizer, model = load_model()
    inputs = tokenizer(prompt, return_tensors="pt")
    # Adjust parameters for concise and deterministic output
    outputs = model.generate(
        **inputs,
        max_new_tokens=60,     # Limit output length for faster responses
        temperature=0.0,       # Deterministic output
        pad_token_id=tokenizer.eos_token_id,
        do_sample=False
    )
    response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print("Raw model response:", response_text)  # Debug: Inspect raw output

    # Use regex (non-greedy) to extract the first JSON object from the response
    match = re.search(r'\{.*?\}', response_text)
    if match:
        json_text = match.group(0)
        try:
            result = json.loads(json_text)
        except Exception as e:
            result = {"stars": 0, "feedback": "Evaluation failed. Unable to parse AI response."}
    else:
        result = {"stars": 0, "feedback": "Evaluation failed. Unable to extract JSON from AI response."}
    
    return result

# For direct command-line testing.
if __name__ == "__main__":
    import sys
    if len(sys.argv) < 3:
        print(json.dumps({"error": "Please provide a question and code as arguments"}))
        sys.exit(1)
    question = sys.argv[1]
    code = sys.argv[2]
    result = evaluate_code(question, code)
    print(json.dumps(result))