neovalle commited on
Commit
57f5984
·
verified ·
1 Parent(s): b0d9dad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -83
app.py CHANGED
@@ -3,105 +3,75 @@ import csv
3
  import re
4
  import tempfile
5
  import os
 
6
 
7
- import torch
8
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 
9
 
10
- MODEL_NAME = "google/flan-t5-large"
11
-
12
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
13
- model = AutoModelForSeq2SeqLM.from_pretrained(
14
- MODEL_NAME,
15
- torch_dtype=torch.float16 if torch.cuda.is_available() else None
16
- )
17
- if torch.cuda.is_available():
18
- model = model.to("cuda")
19
-
20
- pipe = pipeline(
21
- "text2text-generation",
22
- model=model,
23
- tokenizer=tokenizer,
24
- device=0 if torch.cuda.is_available() else -1
25
- )
26
 
27
  def score_qa(question, answer):
28
- """
29
- Prompt Flan-T5 to return just an integer score.
30
- Fallback to 5 if we fail to parse a number.
31
- """
32
- prompt = f"""
33
- You are an ecolinguistics judge. You will be given a question and an answer.
34
- Please return a single integer score from 0 to 5 based on how well the answer
35
- addresses ecological concerns, clarity, and factual correctness.
 
36
 
37
- Only return the number, nothing else.
 
 
 
 
 
 
 
 
38
 
39
- Question: {question}
40
- Answer: {answer}
 
 
 
 
 
 
 
41
 
42
- Score:
43
- """
44
- result = pipe(prompt, max_new_tokens=3, do_sample=False)[0]["generated_text"]
45
- match = re.search(r"\d+", result)
46
- if match:
47
- return int(match.group(0))
48
- return 5
49
 
50
  def judge_ecolinguistics_from_csv(csv_file):
51
- """
52
- 1. Parse the CSV for each question–answer pair.
53
- 2. Use the model to score each pair.
54
- 3. Save a new CSV with (question_number, score) per row + a final Total row.
55
- 4. Return a path to the new CSV for download.
56
- """
57
- # Read rows from the uploaded CSV
58
- rows = []
59
- with open(csv_file.name, "r", encoding="utf-8") as f:
60
- reader = csv.DictReader(f)
61
- for r in reader:
62
- rows.append(r)
63
-
64
- results = []
65
- total_score = 0
66
-
67
- for r in rows:
68
- question_number = r.get("question_number", "")
69
- question = r.get("question", "")
70
- answer = r.get("answer", "")
71
-
72
- sc = score_qa(question, answer)
73
- total_score += sc
74
-
75
- results.append({
76
- "question_number": question_number,
77
- "score": sc
78
- })
79
-
80
- # Write results to a new CSV in text mode
81
- with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".csv", encoding="utf-8") as out_file:
82
- fieldnames = ["question_number", "score"]
83
- writer = csv.DictWriter(out_file, fieldnames=fieldnames)
84
- writer.writeheader()
85
- for row in results:
86
- writer.writerow(row)
87
- # Add a final row showing the total
88
- writer.writerow({"question_number": "Total", "score": total_score})
89
- out_path = out_file.name
90
 
91
- # Return the path to the CSV so Gradio can serve it as a downloadable file
92
- return out_path
93
 
 
94
  demo = gr.Interface(
95
  fn=judge_ecolinguistics_from_csv,
96
- inputs=gr.File(label="Upload a CSV with question_number, question, answer columns"),
97
- outputs=gr.File(label="Download the scored CSV"),
 
 
 
98
  title="Ecolinguistics Q&A Scoring",
99
  description=(
100
- "Upload a CSV containing columns: question_number, question, and answer. "
101
- "The model assigns each answer a score (0-5) based on ecolinguistic criteria. "
102
- "A final row shows the total score across all questions."
103
  )
104
  )
105
 
106
  if __name__ == "__main__":
107
- demo.launch()
 
3
  import re
4
  import tempfile
5
  import os
6
+ import requests
7
 
8
+ # 1. Load system prompt from a file
9
+ with open("system_instructions.txt", "r", encoding="utf-8") as f:
10
+ ECO_PROMPT = f.read()
11
 
12
+ # DeepSeek API configuration
13
+ DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
14
+ DEEPSEEK_API_URL = "https://api.deepseek.com/v1/chat/completions" # Verify actual API endpoint
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def score_qa(question, answer):
17
+ """Query DeepSeek API to get a score for Q&A pair"""
18
+ try:
19
+ # Format the prompt using our template
20
+ prompt = ECO_PROMPT.format(question=question, answer=answer)
21
+
22
+ headers = {
23
+ "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
24
+ "Content-Type": "application/json"
25
+ }
26
 
27
+ payload = {
28
+ "model": "deepseek-chat", # Verify correct model name
29
+ "messages": [{
30
+ "role": "user",
31
+ "content": prompt
32
+ }],
33
+ "temperature": 0.1, # More deterministic output for scoring
34
+ "max_tokens": 5
35
+ }
36
 
37
+ response = requests.post(DEEPSEEK_API_URL, json=payload, headers=headers)
38
+ response.raise_for_status()
39
+
40
+ # Parse response (adjust based on actual API response structure)
41
+ output = response.json()['choices'][0]['message']['content']
42
+
43
+ # Extract score using same logic as before
44
+ match = re.search(r"\d+", output)
45
+ return int(match.group(0)) if match else 1
46
 
47
+ except Exception as e:
48
+ print(f"API Error: {str(e)}")
49
+ return 1 # Fallback score on error
 
 
 
 
50
 
51
  def judge_ecolinguistics_from_csv(csv_file):
52
+ """Existing CSV processing function remains the same"""
53
+ # [Keep the existing implementation exactly as you have it]
54
+ # ... (same file processing logic)
55
+ # ... (same CSV writing logic)
56
+ # ... (same percentage calculation)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
+ return out_path, percentage_display
 
59
 
60
+ # [Keep the Gradio interface configuration exactly as is]
61
  demo = gr.Interface(
62
  fn=judge_ecolinguistics_from_csv,
63
+ inputs=gr.File(label="Upload CSV with question_number, question, answer"),
64
+ outputs=[
65
+ gr.File(label="Download scored CSV"),
66
+ gr.HTML(label="Percentage Score")
67
+ ],
68
  title="Ecolinguistics Q&A Scoring",
69
  description=(
70
+ "Upload a CSV with columns [question_number, question, answer]. "
71
+ "DeepSeek scores each answer from 05, then shows a final "
72
+ "percentage score. A detailed CSV with individual scores is provided."
73
  )
74
  )
75
 
76
  if __name__ == "__main__":
77
+ demo.launch()