Spaces:
Build error
Build error
import gradio as gr | |
import csv | |
import re | |
import tempfile | |
import os | |
import torch | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline | |
MODEL_NAME = "google/flan-t5-large" | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
model = AutoModelForSeq2SeqLM.from_pretrained( | |
MODEL_NAME, | |
torch_dtype=torch.float16 if torch.cuda.is_available() else None | |
) | |
if torch.cuda.is_available(): | |
model = model.to("cuda") | |
pipe = pipeline( | |
"text2text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
device=0 if torch.cuda.is_available() else -1 | |
) | |
def score_qa(question, answer): | |
""" | |
Prompt Flan-T5 to return just an integer score. | |
Fallback to 5 if we fail to parse a number. | |
""" | |
prompt = f""" | |
You are an ecolinguistics judge. You will be given a question and an answer. | |
Please return a single integer score from 0 to 5 based on how well the answer | |
addresses ecological concerns, clarity, and factual correctness. | |
Only return the number, nothing else. | |
Question: {question} | |
Answer: {answer} | |
Score: | |
""" | |
result = pipe(prompt, max_new_tokens=3, do_sample=False)[0]["generated_text"] | |
match = re.search(r"\d+", result) | |
if match: | |
return int(match.group(0)) | |
return 5 | |
def judge_ecolinguistics_from_csv(csv_file): | |
""" | |
1. Parse the CSV for each question–answer pair. | |
2. Use the model to score each pair. | |
3. Save a new CSV with (question_number, score) per row + a final Total row. | |
4. Return a path to the new CSV for download. | |
""" | |
# Read rows from the uploaded CSV | |
rows = [] | |
with open(csv_file.name, "r", encoding="utf-8") as f: | |
reader = csv.DictReader(f) | |
for r in reader: | |
rows.append(r) | |
results = [] | |
total_score = 0 | |
for r in rows: | |
question_number = r.get("question_number", "") | |
question = r.get("question", "") | |
answer = r.get("answer", "") | |
sc = score_qa(question, answer) | |
total_score += sc | |
results.append({ | |
"question_number": question_number, | |
"score": sc | |
}) | |
# Write results to a new CSV in text mode | |
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".csv", encoding="utf-8") as out_file: | |
fieldnames = ["question_number", "score"] | |
writer = csv.DictWriter(out_file, fieldnames=fieldnames) | |
writer.writeheader() | |
for row in results: | |
writer.writerow(row) | |
# Add a final row showing the total | |
writer.writerow({"question_number": "Total", "score": total_score}) | |
out_path = out_file.name | |
# Return the path to the CSV so Gradio can serve it as a downloadable file | |
return out_path | |
demo = gr.Interface( | |
fn=judge_ecolinguistics_from_csv, | |
inputs=gr.File(label="Upload a CSV with question_number, question, answer columns"), | |
outputs=gr.File(label="Download the scored CSV"), | |
title="Ecolinguistics Q&A Scoring", | |
description=( | |
"Upload a CSV containing columns: question_number, question, and answer. " | |
"The model assigns each answer a score (0-5) based on ecolinguistic criteria. " | |
"A final row shows the total score across all questions." | |
) | |
) | |
if __name__ == "__main__": | |
demo.launch() | |