DEADLOCK007X commited on
Commit
3d71f2e
·
1 Parent(s): 0bae633

Update tinyllama_inference.py with improved evaluation and performance

Browse files
Files changed (1) hide show
  1. tinyllama_inference.py +18 -17
tinyllama_inference.py CHANGED
@@ -2,37 +2,39 @@ import json
2
  import re
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
 
 
 
 
5
  def load_model():
6
- # Using a public model for code evaluation.
7
- model_name = "Salesforce/codegen-350M-mono"
8
- tokenizer = AutoTokenizer.from_pretrained(model_name)
9
- model = AutoModelForCausalLM.from_pretrained(model_name)
 
10
  return tokenizer, model
11
 
12
  def evaluate_code(question, code):
13
- # Refined prompt to enforce JSON-only output.
14
  prompt = f"""You are an expert code evaluator.
15
- Evaluate the user's solution to the following problem.
16
- Return ONLY a JSON object with two keys:
17
- - "stars": an integer between 0 and 5 (0 means completely incorrect, 5 means excellent).
18
- - "feedback": a concise message.
19
- Do not include any additional text.
20
  Problem: "{question}"
21
  Solution: "{code}"
22
  """
23
- # Load model and tokenizer.
24
  tokenizer, model = load_model()
25
- # Generate a response with reduced max tokens and a lower temperature for determinism.
26
  inputs = tokenizer(prompt, return_tensors="pt")
27
  outputs = model.generate(
28
  **inputs,
29
- max_new_tokens=100,
30
- temperature=0.2,
31
- pad_token_id=tokenizer.eos_token_id
 
32
  )
33
  response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
34
 
35
- # Attempt to extract the JSON object from the response.
36
  match = re.search(r'\{.*\}', response_text)
37
  if match:
38
  json_text = match.group(0)
@@ -45,7 +47,6 @@ Solution: "{code}"
45
 
46
  return result
47
 
48
- # For direct testing from the command line.
49
  if __name__ == "__main__":
50
  import sys
51
  if len(sys.argv) < 3:
 
2
  import re
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
 
5
+ # Global variables to cache the model and tokenizer
6
+ tokenizer, model = None, None
7
+
8
  def load_model():
9
+ global tokenizer, model
10
+ if tokenizer is None or model is None:
11
+ model_name = "Salesforce/codegen-350M-mono"
12
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
13
+ model = AutoModelForCausalLM.from_pretrained(model_name)
14
  return tokenizer, model
15
 
16
  def evaluate_code(question, code):
17
+ # Refined prompt instructing the model to output exactly valid JSON.
18
  prompt = f"""You are an expert code evaluator.
19
+ Evaluate the following solution for the given problem.
20
+ Return ONLY a JSON object with exactly two keys:
21
+ "stars": an integer between 0 and 5 (0 means completely incorrect, 5 means excellent).
22
+ "feedback": a concise message.
23
+ Output must be exactly valid JSON and nothing else.
24
  Problem: "{question}"
25
  Solution: "{code}"
26
  """
 
27
  tokenizer, model = load_model()
 
28
  inputs = tokenizer(prompt, return_tensors="pt")
29
  outputs = model.generate(
30
  **inputs,
31
+ max_new_tokens=50,
32
+ temperature=0.0,
33
+ pad_token_id=tokenizer.eos_token_id,
34
+ do_sample=False
35
  )
36
  response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
37
 
 
38
  match = re.search(r'\{.*\}', response_text)
39
  if match:
40
  json_text = match.group(0)
 
47
 
48
  return result
49
 
 
50
  if __name__ == "__main__":
51
  import sys
52
  if len(sys.argv) < 3: