neovalle's picture
Update app.py
a7ddeb5 verified
raw
history blame
3.26 kB
import gradio as gr
import csv
import re
import tempfile
import os
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
MODEL_NAME = "google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSeq2SeqLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float16 if torch.cuda.is_available() else None
)
if torch.cuda.is_available():
model = model.to("cuda")
pipe = pipeline(
"text2text-generation",
model=model,
tokenizer=tokenizer,
device=0 if torch.cuda.is_available() else -1
)
def score_qa(question, answer):
"""
Prompt Flan-T5 to return just an integer score.
Fallback to 5 if we fail to parse a number.
"""
prompt = f"""
You are an ecolinguistics judge. You will be given a question and an answer.
Please return a single integer score from 0 to 5 based on how well the answer
addresses ecological concerns, clarity, and factual correctness.
Only return the number, nothing else.
Question: {question}
Answer: {answer}
Score:
"""
result = pipe(prompt, max_new_tokens=3, do_sample=False)[0]["generated_text"]
match = re.search(r"\d+", result)
if match:
return int(match.group(0))
return 5
def judge_ecolinguistics_from_csv(csv_file):
"""
1. Parse the CSV for each question–answer pair.
2. Use the model to score each pair.
3. Save a new CSV with (question_number, score) per row + a final Total row.
4. Return a path to the new CSV for download.
"""
# Read rows from the uploaded CSV
rows = []
with open(csv_file.name, "r", encoding="utf-8") as f:
reader = csv.DictReader(f)
for r in reader:
rows.append(r)
results = []
total_score = 0
for r in rows:
question_number = r.get("question_number", "")
question = r.get("question", "")
answer = r.get("answer", "")
sc = score_qa(question, answer)
total_score += sc
results.append({
"question_number": question_number,
"score": sc
})
# Write results to a new CSV in text mode
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".csv", encoding="utf-8") as out_file:
fieldnames = ["question_number", "score"]
writer = csv.DictWriter(out_file, fieldnames=fieldnames)
writer.writeheader()
for row in results:
writer.writerow(row)
# Add a final row showing the total
writer.writerow({"question_number": "Total", "score": total_score})
out_path = out_file.name
# Return the path to the CSV so Gradio can serve it as a downloadable file
return out_path
demo = gr.Interface(
fn=judge_ecolinguistics_from_csv,
inputs=gr.File(label="Upload a CSV with question_number, question, answer columns"),
outputs=gr.File(label="Download the scored CSV"),
title="Ecolinguistics Q&A Scoring",
description=(
"Upload a CSV containing columns: question_number, question, and answer. "
"The model assigns each answer a score (0-5) based on ecolinguistic criteria. "
"A final row shows the total score across all questions."
)
)
if __name__ == "__main__":
demo.launch()