Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,46 +1,112 @@
|
|
1 |
import gradio as gr
|
2 |
-
|
|
|
|
|
|
|
3 |
|
4 |
-
|
5 |
-
|
6 |
-
# If you're on CPU only, leave device=-1.
|
7 |
-
model_name = "google/flan-t5-large"
|
8 |
-
pipe = pipeline("text2text-generation", model=model_name, device=-1)
|
9 |
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
"""
|
12 |
-
|
13 |
-
|
14 |
"""
|
15 |
-
# Construct a single prompt that instructs the model to score each Q&A pair.
|
16 |
prompt = f"""
|
17 |
-
You are an ecolinguistics judge. You
|
18 |
-
|
|
|
19 |
|
20 |
-
|
21 |
|
22 |
-
{
|
|
|
23 |
|
24 |
-
|
25 |
"""
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
-
# Build the Gradio interface.
|
31 |
demo = gr.Interface(
|
32 |
-
fn=
|
33 |
-
inputs=gr.
|
34 |
-
|
35 |
-
|
36 |
-
placeholder="Example:\nQ1: What is an ecological niche?\nA1: It is the role a species plays in its environment.\n\nQ2: How does deforestation affect the climate?\nA2: It can reduce carbon sequestration and disrupt rainfall patterns.\n"
|
37 |
-
),
|
38 |
-
outputs="text",
|
39 |
-
title="Ecolinguistics Q&A Scorer (Flan-T5-Large)",
|
40 |
description=(
|
41 |
-
"
|
42 |
-
"
|
43 |
-
"
|
44 |
)
|
45 |
)
|
46 |
|
|
|
1 |
import gradio as gr
|
2 |
+
import csv
|
3 |
+
import re
|
4 |
+
import tempfile
|
5 |
+
import os
|
6 |
|
7 |
+
import torch
|
8 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
|
|
|
|
|
|
9 |
|
10 |
+
# MODEL_NAME can be "google/flan-t5-large", or switch to "google/flan-t5-base" if you need faster/less memory usage.
|
11 |
+
MODEL_NAME = "google/flan-t5-large"
|
12 |
+
|
13 |
+
# Load the tokenizer and model.
|
14 |
+
# If you're on a GPU Space, device=0 places the model on GPU.
|
15 |
+
# If CPU only, leave device=-1.
|
16 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
17 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16 if torch.cuda.is_available() else None)
|
18 |
+
if torch.cuda.is_available():
|
19 |
+
model = model.to("cuda")
|
20 |
+
|
21 |
+
pipe = pipeline(
|
22 |
+
"text2text-generation",
|
23 |
+
model=model,
|
24 |
+
tokenizer=tokenizer,
|
25 |
+
device=0 if torch.cuda.is_available() else -1
|
26 |
+
)
|
27 |
+
|
28 |
+
def score_qa(question, answer):
|
29 |
"""
|
30 |
+
Prompt Flan-T5 to return just an integer score from 1–10 (no extra text).
|
31 |
+
Fallback to 5 if we fail to parse a number.
|
32 |
"""
|
|
|
33 |
prompt = f"""
|
34 |
+
You are an ecolinguistics judge. You will be given a question and an answer.
|
35 |
+
Please return a single integer score from 1 to 10 based on how well the answer
|
36 |
+
addresses ecological concerns, clarity, and factual correctness.
|
37 |
|
38 |
+
Only return the number, nothing else.
|
39 |
|
40 |
+
Question: {question}
|
41 |
+
Answer: {answer}
|
42 |
|
43 |
+
Score:
|
44 |
"""
|
45 |
+
result = pipe(prompt, max_new_tokens=3, do_sample=False)[0]["generated_text"]
|
46 |
+
# Extract the first integer found in the output
|
47 |
+
match = re.search(r"\d+", result)
|
48 |
+
if match:
|
49 |
+
return int(match.group(0))
|
50 |
+
# If no number is found, return a default (e.g. 5)
|
51 |
+
return 5
|
52 |
+
|
53 |
+
def judge_ecolinguistics_from_csv(csv_file):
|
54 |
+
"""
|
55 |
+
1. Parse the CSV for each question–answer pair.
|
56 |
+
2. Use the model to score each pair.
|
57 |
+
3. Save a new CSV with (question_number, score) per row plus a total row.
|
58 |
+
4. Return the path to the new CSV for download.
|
59 |
+
"""
|
60 |
+
# Read rows from the uploaded CSV
|
61 |
+
# Adjust these column names to match your actual CSV headers
|
62 |
+
rows = []
|
63 |
+
with open(csv_file.name, "r", encoding="utf-8") as f:
|
64 |
+
reader = csv.DictReader(f)
|
65 |
+
for r in reader:
|
66 |
+
rows.append(r)
|
67 |
+
|
68 |
+
results = []
|
69 |
+
total_score = 0
|
70 |
+
|
71 |
+
for r in rows:
|
72 |
+
# Extract needed columns; adapt to match your CSV
|
73 |
+
question_number = r.get("question_number", "")
|
74 |
+
question = r.get("question", "")
|
75 |
+
answer = r.get("answer", "")
|
76 |
+
|
77 |
+
# Score each Q&A with the model
|
78 |
+
sc = score_qa(question, answer)
|
79 |
+
total_score += sc
|
80 |
+
|
81 |
+
results.append({
|
82 |
+
"question_number": question_number,
|
83 |
+
"score": sc
|
84 |
+
})
|
85 |
+
|
86 |
+
# Write results to a new CSV
|
87 |
+
# We'll place it in a temporary file so Gradio can return it
|
88 |
+
out_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
|
89 |
+
fieldnames = ["question_number", "score"]
|
90 |
+
writer = csv.DictWriter(out_file, fieldnames=fieldnames)
|
91 |
+
writer.writeheader()
|
92 |
+
for row in results:
|
93 |
+
writer.writerow(row)
|
94 |
+
# Add a final row showing the total
|
95 |
+
writer.writerow({"question_number": "Total", "score": total_score})
|
96 |
+
out_file.close()
|
97 |
+
|
98 |
+
# Return the path to the CSV so Gradio can serve it as a downloadable file
|
99 |
+
return out_file.name
|
100 |
|
|
|
101 |
demo = gr.Interface(
|
102 |
+
fn=judge_ecolinguistics_from_csv,
|
103 |
+
inputs=gr.File(label="Upload a CSV with question_number, question, answer columns"),
|
104 |
+
outputs=gr.File(label="Download the scored CSV"),
|
105 |
+
title="Ecolinguistics Q&A Scoring (Flan‑T5‑Large)",
|
|
|
|
|
|
|
|
|
106 |
description=(
|
107 |
+
"Upload a CSV containing columns: question_number, question, and answer. "
|
108 |
+
"The model assigns each answer a score (1–10) based on ecolinguistic criteria. "
|
109 |
+
"A final row shows the total score across all questions."
|
110 |
)
|
111 |
)
|
112 |
|