Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -7,14 +7,13 @@ import os
|
|
7 |
import torch
|
8 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
9 |
|
10 |
-
# MODEL_NAME can be "google/flan-t5-large", or switch to "google/flan-t5-base" if you need faster/less memory usage.
|
11 |
MODEL_NAME = "google/flan-t5-large"
|
12 |
|
13 |
-
# Load the tokenizer and model.
|
14 |
-
# If you're on a GPU Space, device=0 places the model on GPU.
|
15 |
-
# If CPU only, leave device=-1.
|
16 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
17 |
-
model = AutoModelForSeq2SeqLM.from_pretrained(
|
|
|
|
|
|
|
18 |
if torch.cuda.is_available():
|
19 |
model = model.to("cuda")
|
20 |
|
@@ -43,22 +42,19 @@ Answer: {answer}
|
|
43 |
Score:
|
44 |
"""
|
45 |
result = pipe(prompt, max_new_tokens=3, do_sample=False)[0]["generated_text"]
|
46 |
-
# Extract the first integer found in the output
|
47 |
match = re.search(r"\d+", result)
|
48 |
if match:
|
49 |
return int(match.group(0))
|
50 |
-
# If no number is found, return a default (e.g. 5)
|
51 |
return 5
|
52 |
|
53 |
def judge_ecolinguistics_from_csv(csv_file):
|
54 |
"""
|
55 |
1. Parse the CSV for each question–answer pair.
|
56 |
2. Use the model to score each pair.
|
57 |
-
3. Save a new CSV with (question_number, score) per row
|
58 |
-
4. Return
|
59 |
"""
|
60 |
# Read rows from the uploaded CSV
|
61 |
-
# Adjust these column names to match your actual CSV headers
|
62 |
rows = []
|
63 |
with open(csv_file.name, "r", encoding="utf-8") as f:
|
64 |
reader = csv.DictReader(f)
|
@@ -69,12 +65,10 @@ def judge_ecolinguistics_from_csv(csv_file):
|
|
69 |
total_score = 0
|
70 |
|
71 |
for r in rows:
|
72 |
-
# Extract needed columns; adapt to match your CSV
|
73 |
question_number = r.get("question_number", "")
|
74 |
question = r.get("question", "")
|
75 |
answer = r.get("answer", "")
|
76 |
|
77 |
-
# Score each Q&A with the model
|
78 |
sc = score_qa(question, answer)
|
79 |
total_score += sc
|
80 |
|
@@ -83,20 +77,19 @@ def judge_ecolinguistics_from_csv(csv_file):
|
|
83 |
"score": sc
|
84 |
})
|
85 |
|
86 |
-
# Write results to a new CSV
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
out_file.close()
|
97 |
|
98 |
# Return the path to the CSV so Gradio can serve it as a downloadable file
|
99 |
-
return
|
100 |
|
101 |
demo = gr.Interface(
|
102 |
fn=judge_ecolinguistics_from_csv,
|
|
|
7 |
import torch
|
8 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
9 |
|
|
|
10 |
MODEL_NAME = "google/flan-t5-large"
|
11 |
|
|
|
|
|
|
|
12 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
13 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(
|
14 |
+
MODEL_NAME,
|
15 |
+
torch_dtype=torch.float16 if torch.cuda.is_available() else None
|
16 |
+
)
|
17 |
if torch.cuda.is_available():
|
18 |
model = model.to("cuda")
|
19 |
|
|
|
42 |
Score:
|
43 |
"""
|
44 |
result = pipe(prompt, max_new_tokens=3, do_sample=False)[0]["generated_text"]
|
|
|
45 |
match = re.search(r"\d+", result)
|
46 |
if match:
|
47 |
return int(match.group(0))
|
|
|
48 |
return 5
|
49 |
|
50 |
def judge_ecolinguistics_from_csv(csv_file):
|
51 |
"""
|
52 |
1. Parse the CSV for each question–answer pair.
|
53 |
2. Use the model to score each pair.
|
54 |
+
3. Save a new CSV with (question_number, score) per row + a final Total row.
|
55 |
+
4. Return a path to the new CSV for download.
|
56 |
"""
|
57 |
# Read rows from the uploaded CSV
|
|
|
58 |
rows = []
|
59 |
with open(csv_file.name, "r", encoding="utf-8") as f:
|
60 |
reader = csv.DictReader(f)
|
|
|
65 |
total_score = 0
|
66 |
|
67 |
for r in rows:
|
|
|
68 |
question_number = r.get("question_number", "")
|
69 |
question = r.get("question", "")
|
70 |
answer = r.get("answer", "")
|
71 |
|
|
|
72 |
sc = score_qa(question, answer)
|
73 |
total_score += sc
|
74 |
|
|
|
77 |
"score": sc
|
78 |
})
|
79 |
|
80 |
+
# Write results to a new CSV in text mode
|
81 |
+
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".csv", encoding="utf-8") as out_file:
|
82 |
+
fieldnames = ["question_number", "score"]
|
83 |
+
writer = csv.DictWriter(out_file, fieldnames=fieldnames)
|
84 |
+
writer.writeheader()
|
85 |
+
for row in results:
|
86 |
+
writer.writerow(row)
|
87 |
+
# Add a final row showing the total
|
88 |
+
writer.writerow({"question_number": "Total", "score": total_score})
|
89 |
+
out_path = out_file.name
|
|
|
90 |
|
91 |
# Return the path to the CSV so Gradio can serve it as a downloadable file
|
92 |
+
return out_path
|
93 |
|
94 |
demo = gr.Interface(
|
95 |
fn=judge_ecolinguistics_from_csv,
|