Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -14,6 +14,12 @@ import shutil
|
|
14 |
import spaces
|
15 |
import time
|
16 |
from langdetect import detect
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
# Set environment variable to disable tokenizers parallelism warning
|
19 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
@@ -53,11 +59,9 @@ class LazyPipeline:
|
|
53 |
"text-generation",
|
54 |
model=model,
|
55 |
tokenizer=tokenizer,
|
56 |
-
max_new_tokens=
|
57 |
do_sample=True,
|
58 |
-
temperature=0.
|
59 |
-
top_p=0.95,
|
60 |
-
top_k=50,
|
61 |
)
|
62 |
return self.pipeline
|
63 |
|
@@ -150,6 +154,33 @@ def count_words_and_tokens(text):
|
|
150 |
tokens = len(AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3").tokenize(text))
|
151 |
return words, tokens
|
152 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
@spaces.GPU(duration=150)
|
154 |
def process_input(input_file, progress=gr.Progress()):
|
155 |
start_time = time.time()
|
@@ -226,34 +257,56 @@ def create_interface():
|
|
226 |
execution_time = gr.Textbox(label="Execution Time", visible=False)
|
227 |
detected_language = gr.Textbox(label="Detected Language", visible=False)
|
228 |
input_info = gr.Textbox(label="Input Information", visible=False)
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
bigfive_output = gr.Textbox(
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
)
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
lines=10,
|
245 |
-
max_lines=20
|
246 |
-
)
|
247 |
|
248 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
return {
|
250 |
progress_text: status,
|
251 |
execution_time: gr.update(value=exec_time, visible=True),
|
252 |
detected_language: gr.update(value=lang, visible=True),
|
253 |
input_info: gr.update(value=info, visible=True),
|
254 |
attachments_output: gr.update(value=attachments, visible=True),
|
|
|
|
|
|
|
255 |
bigfive_output: gr.update(value=bigfive, visible=True),
|
256 |
-
|
|
|
|
|
|
|
|
|
257 |
}
|
258 |
|
259 |
input_file.upload(
|
@@ -261,9 +314,12 @@ def create_interface():
|
|
261 |
inputs=[input_file],
|
262 |
outputs=[progress_text, execution_time, detected_language, input_info, attachments_output, bigfive_output, personalities_output]
|
263 |
).then(
|
264 |
-
fn=
|
265 |
inputs=[progress_text, execution_time, detected_language, input_info, attachments_output, bigfive_output, personalities_output],
|
266 |
-
outputs=[progress_text, execution_time, detected_language, input_info,
|
|
|
|
|
|
|
267 |
)
|
268 |
|
269 |
return iface
|
|
|
14 |
import spaces
|
15 |
import time
|
16 |
from langdetect import detect
|
17 |
+
import seaborn as sns
|
18 |
+
import matplotlib.pyplot as plt
|
19 |
+
import pandas as pd
|
20 |
+
import io
|
21 |
+
import base64
|
22 |
+
import re
|
23 |
|
24 |
# Set environment variable to disable tokenizers parallelism warning
|
25 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
|
59 |
"text-generation",
|
60 |
model=model,
|
61 |
tokenizer=tokenizer,
|
62 |
+
max_new_tokens=4096,
|
63 |
do_sample=True,
|
64 |
+
temperature=0.01,
|
|
|
|
|
65 |
)
|
66 |
return self.pipeline
|
67 |
|
|
|
154 |
tokens = len(AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3").tokenize(text))
|
155 |
return words, tokens
|
156 |
|
157 |
+
def extract_values(text, keys):
|
158 |
+
values = {}
|
159 |
+
for key in keys:
|
160 |
+
match = re.search(f"{key}:\s*([-]?\d+(?:\.\d+)?)", text)
|
161 |
+
if match:
|
162 |
+
values[key] = float(match.group(1))
|
163 |
+
return values
|
164 |
+
|
165 |
+
def create_bar_chart(data, title):
|
166 |
+
plt.figure(figsize=(10, 6))
|
167 |
+
sns.set_style("whitegrid")
|
168 |
+
chart = sns.barplot(x=list(data.keys()), y=list(data.values()), palette="deep")
|
169 |
+
chart.set_xticklabels(chart.get_xticklabels(), rotation=45, ha="right")
|
170 |
+
plt.title(title)
|
171 |
+
plt.tight_layout()
|
172 |
+
|
173 |
+
# Save the plot to a bytes buffer
|
174 |
+
buf = io.BytesIO()
|
175 |
+
plt.savefig(buf, format='png')
|
176 |
+
buf.seek(0)
|
177 |
+
|
178 |
+
# Encode the bytes buffer to base64
|
179 |
+
img_str = base64.b64encode(buf.getvalue()).decode()
|
180 |
+
plt.close()
|
181 |
+
|
182 |
+
return f"data:image/png;base64,{img_str}"
|
183 |
+
|
184 |
@spaces.GPU(duration=150)
|
185 |
def process_input(input_file, progress=gr.Progress()):
|
186 |
start_time = time.time()
|
|
|
257 |
execution_time = gr.Textbox(label="Execution Time", visible=False)
|
258 |
detected_language = gr.Textbox(label="Detected Language", visible=False)
|
259 |
input_info = gr.Textbox(label="Input Information", visible=False)
|
260 |
+
|
261 |
+
attachments_output = gr.Textbox(label="Attachments Results", visible=False, lines=10, max_lines=20)
|
262 |
+
attachments_chart = gr.Image(label="Attachments Chart", visible=False)
|
263 |
+
attachments_explanation = gr.Textbox(label="Attachments Explanation", visible=False)
|
264 |
+
attachments_sentence = gr.Textbox(label="Attachments Sentence", visible=False)
|
265 |
+
|
266 |
+
bigfive_output = gr.Textbox(label="Big Five Results", visible=False, lines=10, max_lines=20)
|
267 |
+
bigfive_chart = gr.Image(label="Big Five Chart", visible=False)
|
268 |
+
bigfive_explanation = gr.Textbox(label="Big Five Explanation", visible=False)
|
269 |
+
bigfive_sentence = gr.Textbox(label="Big Five Sentence", visible=False)
|
270 |
+
|
271 |
+
personalities_output = gr.Textbox(label="Personalities Results", visible=False, lines=10, max_lines=20)
|
272 |
+
personalities_chart = gr.Image(label="Personalities Chart", visible=False)
|
273 |
+
personalities_explanation = gr.Textbox(label="Personalities Explanation", visible=False)
|
274 |
+
personalities_sentence = gr.Textbox(label="Personalities Sentence", visible=False)
|
|
|
|
|
|
|
275 |
|
276 |
+
def update_visibility_and_charts(status, exec_time, lang, info, attachments, bigfive, personalities):
|
277 |
+
attachments_data = extract_values(attachments, ["Secured", "Anxious-Preoccupied", "Dismissive-Avoidant", "Fearful-Avoidant"])
|
278 |
+
attachments_chart_img = create_bar_chart(attachments_data, "Attachment Styles")
|
279 |
+
|
280 |
+
bigfive_data = extract_values(bigfive, ["Extraversion", "Agreeableness", "Conscientiousness", "Neuroticism", "Openness"])
|
281 |
+
bigfive_chart_img = create_bar_chart(bigfive_data, "Big Five Traits")
|
282 |
+
|
283 |
+
personalities_data = extract_values(personalities, ["Depressed", "Paranoid", "Schizoid-Schizotypal", "Antisocial-Psychopathic", "Borderline-Dysregulated", "Hysteric-Histrionic", "Narcissistic", "Anxious-Avoidant", "Dependent-Victimized", "Obsessional"])
|
284 |
+
personalities_chart_img = create_bar_chart(personalities_data, "Personality Traits")
|
285 |
+
|
286 |
+
attachments_explanation = re.search(r"Explanation:(.*?)(?=Sentence:|$)", attachments, re.DOTALL)
|
287 |
+
attachments_sentence = re.search(r"Sentence:(.*?)$", attachments, re.DOTALL)
|
288 |
+
|
289 |
+
bigfive_explanation = re.search(r"Explanation:(.*?)(?=Sentence:|$)", bigfive, re.DOTALL)
|
290 |
+
bigfive_sentence = re.search(r"Sentence:(.*?)$", bigfive, re.DOTALL)
|
291 |
+
|
292 |
+
personalities_explanation = re.search(r"Explanation:(.*?)(?=Sentence:|$)", personalities, re.DOTALL)
|
293 |
+
personalities_sentence = re.search(r"Sentence:(.*?)$", personalities, re.DOTALL)
|
294 |
+
|
295 |
return {
|
296 |
progress_text: status,
|
297 |
execution_time: gr.update(value=exec_time, visible=True),
|
298 |
detected_language: gr.update(value=lang, visible=True),
|
299 |
input_info: gr.update(value=info, visible=True),
|
300 |
attachments_output: gr.update(value=attachments, visible=True),
|
301 |
+
attachments_chart: gr.update(value=attachments_chart_img, visible=True),
|
302 |
+
attachments_explanation: gr.update(value=attachments_explanation.group(1).strip() if attachments_explanation else "", visible=True),
|
303 |
+
attachments_sentence: gr.update(value=attachments_sentence.group(1).strip() if attachments_sentence else "", visible=True),
|
304 |
bigfive_output: gr.update(value=bigfive, visible=True),
|
305 |
+
bigfive_chart: gr.update(value=bigfive_chart_img, visible=True),
|
306 |
+
bigfive_explanation: gr.update(value=bigfive_explanation.group(1).strip() if bigfive_explanation else "", visible=True),
|
307 |
+
bigfive_sentence: gr.update(value=bigfive_sentence.group(1).strip() if bigfive_sentence else "", visible=True),
|
308 |
+
personalities_explanation: gr.update(value=personalities_explanation.group(1).strip() if personalities_explanation else "", visible=True),
|
309 |
+
personalities_sentence: gr.update(value=personalities_sentence.group(1).strip() if personalities_sentence else "", visible=True)
|
310 |
}
|
311 |
|
312 |
input_file.upload(
|
|
|
314 |
inputs=[input_file],
|
315 |
outputs=[progress_text, execution_time, detected_language, input_info, attachments_output, bigfive_output, personalities_output]
|
316 |
).then(
|
317 |
+
fn=update_visibility_and_charts,
|
318 |
inputs=[progress_text, execution_time, detected_language, input_info, attachments_output, bigfive_output, personalities_output],
|
319 |
+
outputs=[progress_text, execution_time, detected_language, input_info,
|
320 |
+
attachments_output, attachments_chart, attachments_explanation, attachments_sentence,
|
321 |
+
bigfive_output, bigfive_chart, bigfive_explanation, bigfive_sentence,
|
322 |
+
personalities_output, personalities_chart, personalities_explanation, personalities_sentence]
|
323 |
)
|
324 |
|
325 |
return iface
|