Spaces:

neovalle
/

H4rmoniousDeepBenchmark

Build error

File size: 3,313 Bytes

1cb836d
aca391b
 
 
 
1cb836d
aca391b
 
1cb836d
aca391b
 
 
f79fa2a
 
 
 
aca391b
 
 
 
 
 
 
 
 
 
 
8fc19af
aca391b
 
8fc19af
 
aca391b
 
 
1cb836d
aca391b
8fc19af
aca391b
 
8fc19af
aca391b
8fc19af
aca391b
 
 
 
 
 
 
 
 
 
f79fa2a
 
aca391b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f79fa2a
 
 
 
 
 
 
 
 
 
aca391b
 
f79fa2a
8fc19af
1cb836d
aca391b
 
 
 
8fc19af
aca391b
 
 
8fc19af
1cb836d
 
 
8fc19af

import gradio as gr
import csv
import re
import tempfile
import os

import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

MODEL_NAME = "google/flan-t5-large"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSeq2SeqLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16 if torch.cuda.is_available() else None
)
if torch.cuda.is_available():
    model = model.to("cuda")

pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    device=0 if torch.cuda.is_available() else -1
)

def score_qa(question, answer):
    """
    Prompt Flan-T5 to return just an integer score from 1–10 (no extra text).
    Fallback to 5 if we fail to parse a number.
    """
    prompt = f"""
You are an ecolinguistics judge. You will be given a question and an answer.
Please return a single integer score from 1 to 10 based on how well the answer
addresses ecological concerns, clarity, and factual correctness.

Only return the number, nothing else.

Question: {question}
Answer: {answer}

Score:
"""
    result = pipe(prompt, max_new_tokens=3, do_sample=False)[0]["generated_text"]
    match = re.search(r"\d+", result)
    if match:
        return int(match.group(0))
    return 5

def judge_ecolinguistics_from_csv(csv_file):
    """
    1. Parse the CSV for each question–answer pair.
    2. Use the model to score each pair.
    3. Save a new CSV with (question_number, score) per row + a final Total row.
    4. Return a path to the new CSV for download.
    """
    # Read rows from the uploaded CSV
    rows = []
    with open(csv_file.name, "r", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for r in reader:
            rows.append(r)
    
    results = []
    total_score = 0

    for r in rows:
        question_number = r.get("question_number", "")
        question = r.get("question", "")
        answer = r.get("answer", "")

        sc = score_qa(question, answer)
        total_score += sc

        results.append({
            "question_number": question_number,
            "score": sc
        })
    
    # Write results to a new CSV in text mode
    with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".csv", encoding="utf-8") as out_file:
        fieldnames = ["question_number", "score"]
        writer = csv.DictWriter(out_file, fieldnames=fieldnames)
        writer.writeheader()
        for row in results:
            writer.writerow(row)
        # Add a final row showing the total
        writer.writerow({"question_number": "Total", "score": total_score})
        out_path = out_file.name

    # Return the path to the CSV so Gradio can serve it as a downloadable file
    return out_path

demo = gr.Interface(
    fn=judge_ecolinguistics_from_csv,
    inputs=gr.File(label="Upload a CSV with question_number, question, answer columns"),
    outputs=gr.File(label="Download the scored CSV"),
    title="Ecolinguistics Q&A Scoring (Flan‑T5‑Large)",
    description=(
        "Upload a CSV containing columns: question_number, question, and answer. "
        "The model assigns each answer a score (1–10) based on ecolinguistic criteria. "
        "A final row shows the total score across all questions."
    )
)

if __name__ == "__main__":
    demo.launch()