Spaces:

DEADLOCK007X
/

CODEXspace

Sleeping

App Files Files Community

DEADLOCK007X commited on Mar 15

Commit

35d31e1

1 Parent(s): b72b033

Refine evaluation prompt and extraction for DeepSeek instruct model

Browse files

Files changed (1) hide show

tinyllama_inference.py +16 -9

tinyllama_inference.py CHANGED Viewed

@@ -15,13 +15,21 @@ def load_model():
     return tokenizer, model
 def evaluate_code(question, code):
     prompt = f"""You are an expert code evaluator.
 Evaluate the following solution for the given problem.
 Respond with exactly one JSON object (with no extra text) that has exactly two keys:
-  "stars": an integer between 0 and 5 (0 means completely incorrect, 5 means excellent),
-  "feedback": a concise string message.
 The JSON must start with '{{' and end with '}}'.
-Do not output anything else.
 Question: "{question}"
 Solution: "{code}"
 Your response:"""
@@ -30,21 +38,20 @@ Your response:"""
     inputs = tokenizer(prompt, return_tensors="pt")
     outputs = model.generate(
         **inputs,
-        max_new_tokens=100,      # Allow enough tokens for a complete response
-        temperature=0.2,         # Small randomness for creativity
         pad_token_id=tokenizer.eos_token_id,
-        do_sample=True            # Enable sampling to encourage generation
     )
     response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    print("Raw model response:", response_text)  # Debug output
-    # Use regex to extract all JSON objects (non-greedy)
     matches = re.findall(r'\{.*?\}', response_text)
     result = None
     for m in matches:
         try:
             temp = json.loads(m)
-            # Check that the parsed JSON contains both expected keys
             if isinstance(temp, dict) and "stars" in temp and "feedback" in temp:
                 result = temp
                 break

     return tokenizer, model
 def evaluate_code(question, code):
+    # Refined prompt with explicit rating criteria.
     prompt = f"""You are an expert code evaluator.
 Evaluate the following solution for the given problem.
+Rate the solution as follows:
+  - 5 stars: Perfect solution; it is correct, efficient, and follows best practices.
+  - 4 stars: Correct solution with minor issues or improvements possible.
+  - 3 stars: Partially correct solution with noticeable issues.
+  - 2 stars: Incorrect solution with some correct elements.
+  - 1 star: Mostly incorrect solution.
+  - 0 stars: Completely incorrect solution.
 Respond with exactly one JSON object (with no extra text) that has exactly two keys:
+  "stars": an integer between 0 and 5,
+  "feedback": a concise string message explaining your rating.
 The JSON must start with '{{' and end with '}}'.
+Do not output any additional text.
 Question: "{question}"
 Solution: "{code}"
 Your response:"""
     inputs = tokenizer(prompt, return_tensors="pt")
     outputs = model.generate(
         **inputs,
+        max_new_tokens=120,      # Increase token allowance for a complete evaluation
+        temperature=0.1,         # A low temperature for more deterministic output
         pad_token_id=tokenizer.eos_token_id,
+        do_sample=True            # Enable sampling to allow some creativity
     )
     response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    print("Raw model response:", response_text)  # Debug: inspect raw output
+    # Extract all JSON objects using non-greedy regex and select the one with expected keys
     matches = re.findall(r'\{.*?\}', response_text)
     result = None
     for m in matches:
         try:
             temp = json.loads(m)
             if isinstance(temp, dict) and "stars" in temp and "feedback" in temp:
                 result = temp
                 break