Spaces:
Build error
Build error
File size: 3,313 Bytes
1cb836d aca391b 1cb836d aca391b 1cb836d aca391b f79fa2a aca391b 8fc19af aca391b 8fc19af aca391b 1cb836d aca391b 8fc19af aca391b 8fc19af aca391b 8fc19af aca391b f79fa2a aca391b f79fa2a aca391b f79fa2a 8fc19af 1cb836d aca391b 8fc19af aca391b 8fc19af 1cb836d 8fc19af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import gradio as gr
import csv
import re
import tempfile
import os
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
MODEL_NAME = "google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSeq2SeqLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float16 if torch.cuda.is_available() else None
)
if torch.cuda.is_available():
model = model.to("cuda")
pipe = pipeline(
"text2text-generation",
model=model,
tokenizer=tokenizer,
device=0 if torch.cuda.is_available() else -1
)
def score_qa(question, answer):
"""
Prompt Flan-T5 to return just an integer score from 1–10 (no extra text).
Fallback to 5 if we fail to parse a number.
"""
prompt = f"""
You are an ecolinguistics judge. You will be given a question and an answer.
Please return a single integer score from 1 to 10 based on how well the answer
addresses ecological concerns, clarity, and factual correctness.
Only return the number, nothing else.
Question: {question}
Answer: {answer}
Score:
"""
result = pipe(prompt, max_new_tokens=3, do_sample=False)[0]["generated_text"]
match = re.search(r"\d+", result)
if match:
return int(match.group(0))
return 5
def judge_ecolinguistics_from_csv(csv_file):
"""
1. Parse the CSV for each question–answer pair.
2. Use the model to score each pair.
3. Save a new CSV with (question_number, score) per row + a final Total row.
4. Return a path to the new CSV for download.
"""
# Read rows from the uploaded CSV
rows = []
with open(csv_file.name, "r", encoding="utf-8") as f:
reader = csv.DictReader(f)
for r in reader:
rows.append(r)
results = []
total_score = 0
for r in rows:
question_number = r.get("question_number", "")
question = r.get("question", "")
answer = r.get("answer", "")
sc = score_qa(question, answer)
total_score += sc
results.append({
"question_number": question_number,
"score": sc
})
# Write results to a new CSV in text mode
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".csv", encoding="utf-8") as out_file:
fieldnames = ["question_number", "score"]
writer = csv.DictWriter(out_file, fieldnames=fieldnames)
writer.writeheader()
for row in results:
writer.writerow(row)
# Add a final row showing the total
writer.writerow({"question_number": "Total", "score": total_score})
out_path = out_file.name
# Return the path to the CSV so Gradio can serve it as a downloadable file
return out_path
demo = gr.Interface(
fn=judge_ecolinguistics_from_csv,
inputs=gr.File(label="Upload a CSV with question_number, question, answer columns"),
outputs=gr.File(label="Download the scored CSV"),
title="Ecolinguistics Q&A Scoring (Flan‑T5‑Large)",
description=(
"Upload a CSV containing columns: question_number, question, and answer. "
"The model assigns each answer a score (1–10) based on ecolinguistic criteria. "
"A final row shows the total score across all questions."
)
)
if __name__ == "__main__":
demo.launch()
|