Spaces:

DEADLOCK007X
/

CODEXspace

Sleeping

App Files Files Community

DEADLOCK007X commited on Mar 15

Commit

ec5407e

1 Parent(s): 37475c4

Improve JSON extraction with fallback methods

Browse files

Files changed (1) hide show

tinyllama_inference.py +34 -35

tinyllama_inference.py CHANGED Viewed

@@ -14,58 +14,57 @@ def load_model():
         model = AutoModelForCausalLM.from_pretrained(model_name)
     return tokenizer, model
-def extract_json(response_text):
-    # First attempt: Use regex (non-greedy, with DOTALL) to find JSON blocks
-    matches = re.findall(r'\{.*?\}', response_text, re.DOTALL)
-    # Check the matches in reverse order (last one might be the evaluation output)
-    for m in reversed(matches):
-        try:
-            temp = json.loads(m)
-            if isinstance(temp, dict) and "stars" in temp and "feedback" in temp:
-                return temp
-        except Exception:
-            continue
-    # Fallback: try extracting JSON from each line that looks like a JSON object
-    json_lines = [line.strip() for line in response_text.splitlines() if line.strip().startswith('{') and line.strip().endswith('}')]
-    for line in reversed(json_lines):
-        try:
-            temp = json.loads(line)
-            if isinstance(temp, dict) and "stars" in temp and "feedback" in temp:
-                return temp
-        except Exception:
-            continue
-    return {"stars": 0, "feedback": "Evaluation failed. Unable to extract valid JSON from AI response."}
 def evaluate_code(question, code):
     prompt = f"""You are an expert code evaluator.
-For the following problem and solution, provide exactly one JSON object that contains your evaluation.
-The JSON object must have exactly two keys:
-  "stars": an integer between 0 and 5, where 5 means a perfect solution and 0 means completely incorrect.
-  "feedback": a concise explanation of your rating.
-Do not include any extra text, examples, or multiple responses.
-Only evaluate the code provided below.
 Question: "{question}"
 Solution: "{code}"
 Your response:"""
-    # ... rest of the code remains unchanged ...
     tokenizer, model = load_model()
     inputs = tokenizer(prompt, return_tensors="pt")
     outputs = model.generate(
         **inputs,
-        max_new_tokens=120,      # Increase token allowance for a complete response
-        temperature=0.2,         # Low randomness for deterministic output
         pad_token_id=tokenizer.eos_token_id,
         do_sample=True
     )
     response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    print("Raw model response:", response_text)  # Debug: Inspect raw output
-    result = extract_json(response_text)
     return result
-# For direct command-line testing.
 if __name__ == "__main__":
     import sys
     if len(sys.argv) < 3:

         model = AutoModelForCausalLM.from_pretrained(model_name)
     return tokenizer, model
 def evaluate_code(question, code):
+    # Revised prompt with explicit instructions about arithmetic correctness.
     prompt = f"""You are an expert code evaluator.
+Evaluate the following solution for the given problem.
+The problem asks for a function that returns the square of a number.
+A correct solution must multiply the number by itself (using x*x or x**2).
+If the solution uses any other operation (such as addition), it is completely incorrect.
+Rate the solution as follows:
+  - 5 stars: Perfect solution; the solution is correct, efficient, and follows best practices.
+  - 4 stars: Correct solution with minor issues.
+  - 3 stars: Partially correct solution with noticeable issues.
+  - 2 stars: Incorrect solution with some correct elements.
+  - 1 star: Mostly incorrect solution.
+  - 0 stars: Completely incorrect solution.
+Respond with exactly one JSON object (with no extra text) that has exactly two keys:
+  "stars": an integer between 0 and 5,
+  "feedback": a concise string message explaining your rating.
+The JSON must start with '{{' and end with '}}'.
+Do not output any additional text.
 Question: "{question}"
 Solution: "{code}"
 Your response:"""
     tokenizer, model = load_model()
     inputs = tokenizer(prompt, return_tensors="pt")
     outputs = model.generate(
         **inputs,
+        max_new_tokens=120,
+        temperature=0.2,
         pad_token_id=tokenizer.eos_token_id,
         do_sample=True
     )
     response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    print("Raw model response:", response_text)  # Debug output
+    # Extract JSON: use regex to capture all JSON blocks and select one with expected keys
+    matches = re.findall(r'\{.*?\}', response_text, re.DOTALL)
+    result = None
+    for m in reversed(matches):
+        try:
+            temp = json.loads(m)
+            if isinstance(temp, dict) and "stars" in temp and "feedback" in temp:
+                result = temp
+                break
+        except Exception:
+            continue
+    if result is None:
+        result = {"stars": 0, "feedback": "Evaluation failed. Unable to extract valid JSON from AI response."}
     return result
 if __name__ == "__main__":
     import sys
     if len(sys.argv) < 3: