neovalle commited on
Commit
aca391b
·
verified ·
1 Parent(s): 8fc19af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -30
app.py CHANGED
@@ -1,46 +1,112 @@
1
  import gradio as gr
2
- from transformers import pipeline
 
 
 
3
 
4
- # Load the Flan-T5-Large model and tokenizer via a pipeline.
5
- # If you're on a GPU Space, device=0 will place it on GPU.
6
- # If you're on CPU only, leave device=-1.
7
- model_name = "google/flan-t5-large"
8
- pipe = pipeline("text2text-generation", model=model_name, device=-1)
9
 
10
- def judge_ecolinguistics(pairs_text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  """
12
- Takes a multiline string of question–answer pairs and returns a model-generated
13
- scoring from 1 to 10 for each pair, along with a brief explanation.
14
  """
15
- # Construct a single prompt that instructs the model to score each Q&A pair.
16
  prompt = f"""
17
- You are an ecolinguistics judge. You evaluate answers based on how thoroughly
18
- they address ecological concerns, clarity of expression, and factual correctness.
 
19
 
20
- Below is a set of question–answer pairs:
21
 
22
- {pairs_text}
 
23
 
24
- Please provide, for each pair, a single numerical score from 1 to 10 and a brief explanation.
25
  """
26
- # Use the pipeline to generate a response.
27
- response = pipe(prompt, max_length=512, truncation=True)[0]["generated_text"]
28
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- # Build the Gradio interface.
31
  demo = gr.Interface(
32
- fn=judge_ecolinguistics,
33
- inputs=gr.Textbox(
34
- lines=10,
35
- label="Enter Your Question–Answer Pairs",
36
- placeholder="Example:\nQ1: What is an ecological niche?\nA1: It is the role a species plays in its environment.\n\nQ2: How does deforestation affect the climate?\nA2: It can reduce carbon sequestration and disrupt rainfall patterns.\n"
37
- ),
38
- outputs="text",
39
- title="Ecolinguistics Q&A Scorer (Flan-T5-Large)",
40
  description=(
41
- "Paste multiple question–answer pairs. The model will assign a score from 1–10 "
42
- "to each answer, considering ecological relevance and clarity. "
43
- "It will also provide a brief rationale for its scoring."
44
  )
45
  )
46
 
 
1
  import gradio as gr
2
+ import csv
3
+ import re
4
+ import tempfile
5
+ import os
6
 
7
+ import torch
8
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 
 
 
9
 
10
+ # MODEL_NAME can be "google/flan-t5-large", or switch to "google/flan-t5-base" if you need faster/less memory usage.
11
+ MODEL_NAME = "google/flan-t5-large"
12
+
13
+ # Load the tokenizer and model.
14
+ # If you're on a GPU Space, device=0 places the model on GPU.
15
+ # If CPU only, leave device=-1.
16
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
17
+ model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16 if torch.cuda.is_available() else None)
18
+ if torch.cuda.is_available():
19
+ model = model.to("cuda")
20
+
21
+ pipe = pipeline(
22
+ "text2text-generation",
23
+ model=model,
24
+ tokenizer=tokenizer,
25
+ device=0 if torch.cuda.is_available() else -1
26
+ )
27
+
28
+ def score_qa(question, answer):
29
  """
30
+ Prompt Flan-T5 to return just an integer score from 1–10 (no extra text).
31
+ Fallback to 5 if we fail to parse a number.
32
  """
 
33
  prompt = f"""
34
+ You are an ecolinguistics judge. You will be given a question and an answer.
35
+ Please return a single integer score from 1 to 10 based on how well the answer
36
+ addresses ecological concerns, clarity, and factual correctness.
37
 
38
+ Only return the number, nothing else.
39
 
40
+ Question: {question}
41
+ Answer: {answer}
42
 
43
+ Score:
44
  """
45
+ result = pipe(prompt, max_new_tokens=3, do_sample=False)[0]["generated_text"]
46
+ # Extract the first integer found in the output
47
+ match = re.search(r"\d+", result)
48
+ if match:
49
+ return int(match.group(0))
50
+ # If no number is found, return a default (e.g. 5)
51
+ return 5
52
+
53
+ def judge_ecolinguistics_from_csv(csv_file):
54
+ """
55
+ 1. Parse the CSV for each question–answer pair.
56
+ 2. Use the model to score each pair.
57
+ 3. Save a new CSV with (question_number, score) per row plus a total row.
58
+ 4. Return the path to the new CSV for download.
59
+ """
60
+ # Read rows from the uploaded CSV
61
+ # Adjust these column names to match your actual CSV headers
62
+ rows = []
63
+ with open(csv_file.name, "r", encoding="utf-8") as f:
64
+ reader = csv.DictReader(f)
65
+ for r in reader:
66
+ rows.append(r)
67
+
68
+ results = []
69
+ total_score = 0
70
+
71
+ for r in rows:
72
+ # Extract needed columns; adapt to match your CSV
73
+ question_number = r.get("question_number", "")
74
+ question = r.get("question", "")
75
+ answer = r.get("answer", "")
76
+
77
+ # Score each Q&A with the model
78
+ sc = score_qa(question, answer)
79
+ total_score += sc
80
+
81
+ results.append({
82
+ "question_number": question_number,
83
+ "score": sc
84
+ })
85
+
86
+ # Write results to a new CSV
87
+ # We'll place it in a temporary file so Gradio can return it
88
+ out_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
89
+ fieldnames = ["question_number", "score"]
90
+ writer = csv.DictWriter(out_file, fieldnames=fieldnames)
91
+ writer.writeheader()
92
+ for row in results:
93
+ writer.writerow(row)
94
+ # Add a final row showing the total
95
+ writer.writerow({"question_number": "Total", "score": total_score})
96
+ out_file.close()
97
+
98
+ # Return the path to the CSV so Gradio can serve it as a downloadable file
99
+ return out_file.name
100
 
 
101
  demo = gr.Interface(
102
+ fn=judge_ecolinguistics_from_csv,
103
+ inputs=gr.File(label="Upload a CSV with question_number, question, answer columns"),
104
+ outputs=gr.File(label="Download the scored CSV"),
105
+ title="Ecolinguistics Q&A Scoring (Flan‑T5‑Large)",
 
 
 
 
106
  description=(
107
+ "Upload a CSV containing columns: question_number, question, and answer. "
108
+ "The model assigns each answer a score (1–10) based on ecolinguistic criteria. "
109
+ "A final row shows the total score across all questions."
110
  )
111
  )
112