reab5555 commited on
Commit
8fa4560
·
verified ·
1 Parent(s): 2b8dfca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -26
app.py CHANGED
@@ -14,6 +14,12 @@ import shutil
14
  import spaces
15
  import time
16
  from langdetect import detect
 
 
 
 
 
 
17
 
18
  # Set environment variable to disable tokenizers parallelism warning
19
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
@@ -53,11 +59,9 @@ class LazyPipeline:
53
  "text-generation",
54
  model=model,
55
  tokenizer=tokenizer,
56
- max_new_tokens=10000,
57
  do_sample=True,
58
- temperature=0.1,
59
- top_p=0.95,
60
- top_k=50,
61
  )
62
  return self.pipeline
63
 
@@ -150,6 +154,33 @@ def count_words_and_tokens(text):
150
  tokens = len(AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3").tokenize(text))
151
  return words, tokens
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  @spaces.GPU(duration=150)
154
  def process_input(input_file, progress=gr.Progress()):
155
  start_time = time.time()
@@ -226,34 +257,56 @@ def create_interface():
226
  execution_time = gr.Textbox(label="Execution Time", visible=False)
227
  detected_language = gr.Textbox(label="Detected Language", visible=False)
228
  input_info = gr.Textbox(label="Input Information", visible=False)
229
- attachments_output = gr.Textbox(
230
- label="Attachments Results",
231
- visible=False,
232
- lines=10,
233
- max_lines=20
234
- )
235
- bigfive_output = gr.Textbox(
236
- label="Big Five Results",
237
- visible=False,
238
- lines=10,
239
- max_lines=20
240
- )
241
- personalities_output = gr.Textbox(
242
- label="Personalities Results",
243
- visible=False,
244
- lines=10,
245
- max_lines=20
246
- )
247
 
248
- def update_visibility(status, exec_time, lang, info, attachments, bigfive, personalities):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  return {
250
  progress_text: status,
251
  execution_time: gr.update(value=exec_time, visible=True),
252
  detected_language: gr.update(value=lang, visible=True),
253
  input_info: gr.update(value=info, visible=True),
254
  attachments_output: gr.update(value=attachments, visible=True),
 
 
 
255
  bigfive_output: gr.update(value=bigfive, visible=True),
256
- personalities_output: gr.update(value=personalities, visible=True)
 
 
 
 
257
  }
258
 
259
  input_file.upload(
@@ -261,9 +314,12 @@ def create_interface():
261
  inputs=[input_file],
262
  outputs=[progress_text, execution_time, detected_language, input_info, attachments_output, bigfive_output, personalities_output]
263
  ).then(
264
- fn=update_visibility,
265
  inputs=[progress_text, execution_time, detected_language, input_info, attachments_output, bigfive_output, personalities_output],
266
- outputs=[progress_text, execution_time, detected_language, input_info, attachments_output, bigfive_output, personalities_output]
 
 
 
267
  )
268
 
269
  return iface
 
14
  import spaces
15
  import time
16
  from langdetect import detect
17
+ import seaborn as sns
18
+ import matplotlib.pyplot as plt
19
+ import pandas as pd
20
+ import io
21
+ import base64
22
+ import re
23
 
24
  # Set environment variable to disable tokenizers parallelism warning
25
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
 
59
  "text-generation",
60
  model=model,
61
  tokenizer=tokenizer,
62
+ max_new_tokens=4096,
63
  do_sample=True,
64
+ temperature=0.01,
 
 
65
  )
66
  return self.pipeline
67
 
 
154
  tokens = len(AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3").tokenize(text))
155
  return words, tokens
156
 
157
+ def extract_values(text, keys):
158
+ values = {}
159
+ for key in keys:
160
+ match = re.search(f"{key}:\s*([-]?\d+(?:\.\d+)?)", text)
161
+ if match:
162
+ values[key] = float(match.group(1))
163
+ return values
164
+
165
+ def create_bar_chart(data, title):
166
+ plt.figure(figsize=(10, 6))
167
+ sns.set_style("whitegrid")
168
+ chart = sns.barplot(x=list(data.keys()), y=list(data.values()), palette="deep")
169
+ chart.set_xticklabels(chart.get_xticklabels(), rotation=45, ha="right")
170
+ plt.title(title)
171
+ plt.tight_layout()
172
+
173
+ # Save the plot to a bytes buffer
174
+ buf = io.BytesIO()
175
+ plt.savefig(buf, format='png')
176
+ buf.seek(0)
177
+
178
+ # Encode the bytes buffer to base64
179
+ img_str = base64.b64encode(buf.getvalue()).decode()
180
+ plt.close()
181
+
182
+ return f"data:image/png;base64,{img_str}"
183
+
184
  @spaces.GPU(duration=150)
185
  def process_input(input_file, progress=gr.Progress()):
186
  start_time = time.time()
 
257
  execution_time = gr.Textbox(label="Execution Time", visible=False)
258
  detected_language = gr.Textbox(label="Detected Language", visible=False)
259
  input_info = gr.Textbox(label="Input Information", visible=False)
260
+
261
+ attachments_output = gr.Textbox(label="Attachments Results", visible=False, lines=10, max_lines=20)
262
+ attachments_chart = gr.Image(label="Attachments Chart", visible=False)
263
+ attachments_explanation = gr.Textbox(label="Attachments Explanation", visible=False)
264
+ attachments_sentence = gr.Textbox(label="Attachments Sentence", visible=False)
265
+
266
+ bigfive_output = gr.Textbox(label="Big Five Results", visible=False, lines=10, max_lines=20)
267
+ bigfive_chart = gr.Image(label="Big Five Chart", visible=False)
268
+ bigfive_explanation = gr.Textbox(label="Big Five Explanation", visible=False)
269
+ bigfive_sentence = gr.Textbox(label="Big Five Sentence", visible=False)
270
+
271
+ personalities_output = gr.Textbox(label="Personalities Results", visible=False, lines=10, max_lines=20)
272
+ personalities_chart = gr.Image(label="Personalities Chart", visible=False)
273
+ personalities_explanation = gr.Textbox(label="Personalities Explanation", visible=False)
274
+ personalities_sentence = gr.Textbox(label="Personalities Sentence", visible=False)
 
 
 
275
 
276
+ def update_visibility_and_charts(status, exec_time, lang, info, attachments, bigfive, personalities):
277
+ attachments_data = extract_values(attachments, ["Secured", "Anxious-Preoccupied", "Dismissive-Avoidant", "Fearful-Avoidant"])
278
+ attachments_chart_img = create_bar_chart(attachments_data, "Attachment Styles")
279
+
280
+ bigfive_data = extract_values(bigfive, ["Extraversion", "Agreeableness", "Conscientiousness", "Neuroticism", "Openness"])
281
+ bigfive_chart_img = create_bar_chart(bigfive_data, "Big Five Traits")
282
+
283
+ personalities_data = extract_values(personalities, ["Depressed", "Paranoid", "Schizoid-Schizotypal", "Antisocial-Psychopathic", "Borderline-Dysregulated", "Hysteric-Histrionic", "Narcissistic", "Anxious-Avoidant", "Dependent-Victimized", "Obsessional"])
284
+ personalities_chart_img = create_bar_chart(personalities_data, "Personality Traits")
285
+
286
+ attachments_explanation = re.search(r"Explanation:(.*?)(?=Sentence:|$)", attachments, re.DOTALL)
287
+ attachments_sentence = re.search(r"Sentence:(.*?)$", attachments, re.DOTALL)
288
+
289
+ bigfive_explanation = re.search(r"Explanation:(.*?)(?=Sentence:|$)", bigfive, re.DOTALL)
290
+ bigfive_sentence = re.search(r"Sentence:(.*?)$", bigfive, re.DOTALL)
291
+
292
+ personalities_explanation = re.search(r"Explanation:(.*?)(?=Sentence:|$)", personalities, re.DOTALL)
293
+ personalities_sentence = re.search(r"Sentence:(.*?)$", personalities, re.DOTALL)
294
+
295
  return {
296
  progress_text: status,
297
  execution_time: gr.update(value=exec_time, visible=True),
298
  detected_language: gr.update(value=lang, visible=True),
299
  input_info: gr.update(value=info, visible=True),
300
  attachments_output: gr.update(value=attachments, visible=True),
301
+ attachments_chart: gr.update(value=attachments_chart_img, visible=True),
302
+ attachments_explanation: gr.update(value=attachments_explanation.group(1).strip() if attachments_explanation else "", visible=True),
303
+ attachments_sentence: gr.update(value=attachments_sentence.group(1).strip() if attachments_sentence else "", visible=True),
304
  bigfive_output: gr.update(value=bigfive, visible=True),
305
+ bigfive_chart: gr.update(value=bigfive_chart_img, visible=True),
306
+ bigfive_explanation: gr.update(value=bigfive_explanation.group(1).strip() if bigfive_explanation else "", visible=True),
307
+ bigfive_sentence: gr.update(value=bigfive_sentence.group(1).strip() if bigfive_sentence else "", visible=True),
308
+ personalities_explanation: gr.update(value=personalities_explanation.group(1).strip() if personalities_explanation else "", visible=True),
309
+ personalities_sentence: gr.update(value=personalities_sentence.group(1).strip() if personalities_sentence else "", visible=True)
310
  }
311
 
312
  input_file.upload(
 
314
  inputs=[input_file],
315
  outputs=[progress_text, execution_time, detected_language, input_info, attachments_output, bigfive_output, personalities_output]
316
  ).then(
317
+ fn=update_visibility_and_charts,
318
  inputs=[progress_text, execution_time, detected_language, input_info, attachments_output, bigfive_output, personalities_output],
319
+ outputs=[progress_text, execution_time, detected_language, input_info,
320
+ attachments_output, attachments_chart, attachments_explanation, attachments_sentence,
321
+ bigfive_output, bigfive_chart, bigfive_explanation, bigfive_sentence,
322
+ personalities_output, personalities_chart, personalities_explanation, personalities_sentence]
323
  )
324
 
325
  return iface