Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -3,105 +3,75 @@ import csv
|
|
3 |
import re
|
4 |
import tempfile
|
5 |
import os
|
|
|
6 |
|
7 |
-
|
8 |
-
|
|
|
9 |
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
model = AutoModelForSeq2SeqLM.from_pretrained(
|
14 |
-
MODEL_NAME,
|
15 |
-
torch_dtype=torch.float16 if torch.cuda.is_available() else None
|
16 |
-
)
|
17 |
-
if torch.cuda.is_available():
|
18 |
-
model = model.to("cuda")
|
19 |
-
|
20 |
-
pipe = pipeline(
|
21 |
-
"text2text-generation",
|
22 |
-
model=model,
|
23 |
-
tokenizer=tokenizer,
|
24 |
-
device=0 if torch.cuda.is_available() else -1
|
25 |
-
)
|
26 |
|
27 |
def score_qa(question, answer):
|
28 |
-
"""
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
36 |
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
-
|
43 |
-
""
|
44 |
-
|
45 |
-
match = re.search(r"\d+", result)
|
46 |
-
if match:
|
47 |
-
return int(match.group(0))
|
48 |
-
return 5
|
49 |
|
50 |
def judge_ecolinguistics_from_csv(csv_file):
|
51 |
-
"""
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
"""
|
57 |
-
# Read rows from the uploaded CSV
|
58 |
-
rows = []
|
59 |
-
with open(csv_file.name, "r", encoding="utf-8") as f:
|
60 |
-
reader = csv.DictReader(f)
|
61 |
-
for r in reader:
|
62 |
-
rows.append(r)
|
63 |
-
|
64 |
-
results = []
|
65 |
-
total_score = 0
|
66 |
-
|
67 |
-
for r in rows:
|
68 |
-
question_number = r.get("question_number", "")
|
69 |
-
question = r.get("question", "")
|
70 |
-
answer = r.get("answer", "")
|
71 |
-
|
72 |
-
sc = score_qa(question, answer)
|
73 |
-
total_score += sc
|
74 |
-
|
75 |
-
results.append({
|
76 |
-
"question_number": question_number,
|
77 |
-
"score": sc
|
78 |
-
})
|
79 |
-
|
80 |
-
# Write results to a new CSV in text mode
|
81 |
-
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".csv", encoding="utf-8") as out_file:
|
82 |
-
fieldnames = ["question_number", "score"]
|
83 |
-
writer = csv.DictWriter(out_file, fieldnames=fieldnames)
|
84 |
-
writer.writeheader()
|
85 |
-
for row in results:
|
86 |
-
writer.writerow(row)
|
87 |
-
# Add a final row showing the total
|
88 |
-
writer.writerow({"question_number": "Total", "score": total_score})
|
89 |
-
out_path = out_file.name
|
90 |
|
91 |
-
|
92 |
-
return out_path
|
93 |
|
|
|
94 |
demo = gr.Interface(
|
95 |
fn=judge_ecolinguistics_from_csv,
|
96 |
-
inputs=gr.File(label="Upload
|
97 |
-
outputs=
|
|
|
|
|
|
|
98 |
title="Ecolinguistics Q&A Scoring",
|
99 |
description=(
|
100 |
-
"Upload a CSV
|
101 |
-
"
|
102 |
-
"
|
103 |
)
|
104 |
)
|
105 |
|
106 |
if __name__ == "__main__":
|
107 |
-
demo.launch()
|
|
|
3 |
import re
|
4 |
import tempfile
|
5 |
import os
|
6 |
+
import requests
|
7 |
|
8 |
+
# 1. Load system prompt from a file
|
9 |
+
with open("system_instructions.txt", "r", encoding="utf-8") as f:
|
10 |
+
ECO_PROMPT = f.read()
|
11 |
|
12 |
+
# DeepSeek API configuration
|
13 |
+
DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
|
14 |
+
DEEPSEEK_API_URL = "https://api.deepseek.com/v1/chat/completions" # Verify actual API endpoint
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
def score_qa(question, answer):
|
17 |
+
"""Query DeepSeek API to get a score for Q&A pair"""
|
18 |
+
try:
|
19 |
+
# Format the prompt using our template
|
20 |
+
prompt = ECO_PROMPT.format(question=question, answer=answer)
|
21 |
+
|
22 |
+
headers = {
|
23 |
+
"Authorization": f"Bearer {DEEPSEEK_API_KEY}",
|
24 |
+
"Content-Type": "application/json"
|
25 |
+
}
|
26 |
|
27 |
+
payload = {
|
28 |
+
"model": "deepseek-chat", # Verify correct model name
|
29 |
+
"messages": [{
|
30 |
+
"role": "user",
|
31 |
+
"content": prompt
|
32 |
+
}],
|
33 |
+
"temperature": 0.1, # More deterministic output for scoring
|
34 |
+
"max_tokens": 5
|
35 |
+
}
|
36 |
|
37 |
+
response = requests.post(DEEPSEEK_API_URL, json=payload, headers=headers)
|
38 |
+
response.raise_for_status()
|
39 |
+
|
40 |
+
# Parse response (adjust based on actual API response structure)
|
41 |
+
output = response.json()['choices'][0]['message']['content']
|
42 |
+
|
43 |
+
# Extract score using same logic as before
|
44 |
+
match = re.search(r"\d+", output)
|
45 |
+
return int(match.group(0)) if match else 1
|
46 |
|
47 |
+
except Exception as e:
|
48 |
+
print(f"API Error: {str(e)}")
|
49 |
+
return 1 # Fallback score on error
|
|
|
|
|
|
|
|
|
50 |
|
51 |
def judge_ecolinguistics_from_csv(csv_file):
|
52 |
+
"""Existing CSV processing function remains the same"""
|
53 |
+
# [Keep the existing implementation exactly as you have it]
|
54 |
+
# ... (same file processing logic)
|
55 |
+
# ... (same CSV writing logic)
|
56 |
+
# ... (same percentage calculation)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
+
return out_path, percentage_display
|
|
|
59 |
|
60 |
+
# [Keep the Gradio interface configuration exactly as is]
|
61 |
demo = gr.Interface(
|
62 |
fn=judge_ecolinguistics_from_csv,
|
63 |
+
inputs=gr.File(label="Upload CSV with question_number, question, answer"),
|
64 |
+
outputs=[
|
65 |
+
gr.File(label="Download scored CSV"),
|
66 |
+
gr.HTML(label="Percentage Score")
|
67 |
+
],
|
68 |
title="Ecolinguistics Q&A Scoring",
|
69 |
description=(
|
70 |
+
"Upload a CSV with columns [question_number, question, answer]. "
|
71 |
+
"DeepSeek scores each answer from 0–5, then shows a final "
|
72 |
+
"percentage score. A detailed CSV with individual scores is provided."
|
73 |
)
|
74 |
)
|
75 |
|
76 |
if __name__ == "__main__":
|
77 |
+
demo.launch()
|