neovalle commited on
Commit
f79fa2a
·
verified ·
1 Parent(s): aca391b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -24
app.py CHANGED
@@ -7,14 +7,13 @@ import os
7
  import torch
8
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
9
 
10
- # MODEL_NAME can be "google/flan-t5-large", or switch to "google/flan-t5-base" if you need faster/less memory usage.
11
  MODEL_NAME = "google/flan-t5-large"
12
 
13
- # Load the tokenizer and model.
14
- # If you're on a GPU Space, device=0 places the model on GPU.
15
- # If CPU only, leave device=-1.
16
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
17
- model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16 if torch.cuda.is_available() else None)
 
 
 
18
  if torch.cuda.is_available():
19
  model = model.to("cuda")
20
 
@@ -43,22 +42,19 @@ Answer: {answer}
43
  Score:
44
  """
45
  result = pipe(prompt, max_new_tokens=3, do_sample=False)[0]["generated_text"]
46
- # Extract the first integer found in the output
47
  match = re.search(r"\d+", result)
48
  if match:
49
  return int(match.group(0))
50
- # If no number is found, return a default (e.g. 5)
51
  return 5
52
 
53
  def judge_ecolinguistics_from_csv(csv_file):
54
  """
55
  1. Parse the CSV for each question–answer pair.
56
  2. Use the model to score each pair.
57
- 3. Save a new CSV with (question_number, score) per row plus a total row.
58
- 4. Return the path to the new CSV for download.
59
  """
60
  # Read rows from the uploaded CSV
61
- # Adjust these column names to match your actual CSV headers
62
  rows = []
63
  with open(csv_file.name, "r", encoding="utf-8") as f:
64
  reader = csv.DictReader(f)
@@ -69,12 +65,10 @@ def judge_ecolinguistics_from_csv(csv_file):
69
  total_score = 0
70
 
71
  for r in rows:
72
- # Extract needed columns; adapt to match your CSV
73
  question_number = r.get("question_number", "")
74
  question = r.get("question", "")
75
  answer = r.get("answer", "")
76
 
77
- # Score each Q&A with the model
78
  sc = score_qa(question, answer)
79
  total_score += sc
80
 
@@ -83,20 +77,19 @@ def judge_ecolinguistics_from_csv(csv_file):
83
  "score": sc
84
  })
85
 
86
- # Write results to a new CSV
87
- # We'll place it in a temporary file so Gradio can return it
88
- out_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
89
- fieldnames = ["question_number", "score"]
90
- writer = csv.DictWriter(out_file, fieldnames=fieldnames)
91
- writer.writeheader()
92
- for row in results:
93
- writer.writerow(row)
94
- # Add a final row showing the total
95
- writer.writerow({"question_number": "Total", "score": total_score})
96
- out_file.close()
97
 
98
  # Return the path to the CSV so Gradio can serve it as a downloadable file
99
- return out_file.name
100
 
101
  demo = gr.Interface(
102
  fn=judge_ecolinguistics_from_csv,
 
7
  import torch
8
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
9
 
 
10
  MODEL_NAME = "google/flan-t5-large"
11
 
 
 
 
12
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
13
+ model = AutoModelForSeq2SeqLM.from_pretrained(
14
+ MODEL_NAME,
15
+ torch_dtype=torch.float16 if torch.cuda.is_available() else None
16
+ )
17
  if torch.cuda.is_available():
18
  model = model.to("cuda")
19
 
 
42
  Score:
43
  """
44
  result = pipe(prompt, max_new_tokens=3, do_sample=False)[0]["generated_text"]
 
45
  match = re.search(r"\d+", result)
46
  if match:
47
  return int(match.group(0))
 
48
  return 5
49
 
50
  def judge_ecolinguistics_from_csv(csv_file):
51
  """
52
  1. Parse the CSV for each question–answer pair.
53
  2. Use the model to score each pair.
54
+ 3. Save a new CSV with (question_number, score) per row + a final Total row.
55
+ 4. Return a path to the new CSV for download.
56
  """
57
  # Read rows from the uploaded CSV
 
58
  rows = []
59
  with open(csv_file.name, "r", encoding="utf-8") as f:
60
  reader = csv.DictReader(f)
 
65
  total_score = 0
66
 
67
  for r in rows:
 
68
  question_number = r.get("question_number", "")
69
  question = r.get("question", "")
70
  answer = r.get("answer", "")
71
 
 
72
  sc = score_qa(question, answer)
73
  total_score += sc
74
 
 
77
  "score": sc
78
  })
79
 
80
+ # Write results to a new CSV in text mode
81
+ with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".csv", encoding="utf-8") as out_file:
82
+ fieldnames = ["question_number", "score"]
83
+ writer = csv.DictWriter(out_file, fieldnames=fieldnames)
84
+ writer.writeheader()
85
+ for row in results:
86
+ writer.writerow(row)
87
+ # Add a final row showing the total
88
+ writer.writerow({"question_number": "Total", "score": total_score})
89
+ out_path = out_file.name
 
90
 
91
  # Return the path to the CSV so Gradio can serve it as a downloadable file
92
+ return out_path
93
 
94
  demo = gr.Interface(
95
  fn=judge_ecolinguistics_from_csv,