Spaces:

DEADLOCK007X
/

CODEXspace

Sleeping

App Files Files Community

DEADLOCK007X commited on Mar 15

Commit

3d71f2e

1 Parent(s): 0bae633

Update tinyllama_inference.py with improved evaluation and performance

Browse files

Files changed (1) hide show

tinyllama_inference.py +18 -17

tinyllama_inference.py CHANGED Viewed

@@ -2,37 +2,39 @@ import json
 import re
 from transformers import AutoTokenizer, AutoModelForCausalLM
 def load_model():
-    # Using a public model for code evaluation.
-    model_name = "Salesforce/codegen-350M-mono"
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForCausalLM.from_pretrained(model_name)
     return tokenizer, model
 def evaluate_code(question, code):
-    # Refined prompt to enforce JSON-only output.
     prompt = f"""You are an expert code evaluator.
-Evaluate the user's solution to the following problem.
-Return ONLY a JSON object with two keys:
-- "stars": an integer between 0 and 5 (0 means completely incorrect, 5 means excellent).
-- "feedback": a concise message.
-Do not include any additional text.
 Problem: "{question}"
 Solution: "{code}"
 """
-    # Load model and tokenizer.
     tokenizer, model = load_model()
-    # Generate a response with reduced max tokens and a lower temperature for determinism.
     inputs = tokenizer(prompt, return_tensors="pt")
     outputs = model.generate(
         **inputs,
-        max_new_tokens=100,
-        temperature=0.2,
-        pad_token_id=tokenizer.eos_token_id
     )
     response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Attempt to extract the JSON object from the response.
     match = re.search(r'\{.*\}', response_text)
     if match:
         json_text = match.group(0)
@@ -45,7 +47,6 @@ Solution: "{code}"
     return result
-# For direct testing from the command line.
 if __name__ == "__main__":
     import sys
     if len(sys.argv) < 3:

 import re
 from transformers import AutoTokenizer, AutoModelForCausalLM
+# Global variables to cache the model and tokenizer
+tokenizer, model = None, None
 def load_model():
+    global tokenizer, model
+    if tokenizer is None or model is None:
+        model_name = "Salesforce/codegen-350M-mono"
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        model = AutoModelForCausalLM.from_pretrained(model_name)
     return tokenizer, model
 def evaluate_code(question, code):
+    # Refined prompt instructing the model to output exactly valid JSON.
     prompt = f"""You are an expert code evaluator.
+Evaluate the following solution for the given problem.
+Return ONLY a JSON object with exactly two keys:
+  "stars": an integer between 0 and 5 (0 means completely incorrect, 5 means excellent).
+  "feedback": a concise message.
+Output must be exactly valid JSON and nothing else.
 Problem: "{question}"
 Solution: "{code}"
 """
     tokenizer, model = load_model()
     inputs = tokenizer(prompt, return_tensors="pt")
     outputs = model.generate(
         **inputs,
+        max_new_tokens=50,
+        temperature=0.0,
+        pad_token_id=tokenizer.eos_token_id,
+        do_sample=False
     )
     response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
     match = re.search(r'\{.*\}', response_text)
     if match:
         json_text = match.group(0)
     return result
 if __name__ == "__main__":
     import sys
     if len(sys.argv) < 3: