reab5555 commited on
Commit
a3e8bd9
·
verified ·
1 Parent(s): dfbf2d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -28
app.py CHANGED
@@ -53,9 +53,9 @@ class LazyPipeline:
53
  "text-generation",
54
  model=model,
55
  tokenizer=tokenizer,
56
- max_length = 4000,
57
  max_new_tokens=512,
58
- temperature=0.1,
59
  )
60
  return self.pipeline
61
 
@@ -113,7 +113,7 @@ class LazyChains:
113
 
114
  def create_prompt(self, task):
115
  return PromptTemplate(
116
- template=task + "\n\nContext: {context}\n\nTask: {question}\n\n---------------------------\n\nAnswer: ",
117
  input_variables=["context", "question"]
118
  )
119
 
@@ -147,32 +147,38 @@ lazy_chains = LazyChains(lazy_llm)
147
 
148
  def count_words_and_tokens(text):
149
  words = len(text.split())
150
- tokens = len(AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct").tokenize(text))
151
  return words, tokens
152
 
153
  @spaces.GPU(duration=150)
154
  def process_input(input_file):
155
  start_time = time.time()
156
 
 
 
 
157
  file_extension = os.path.splitext(input_file.name)[1].lower()
158
 
159
  if file_extension == '.txt':
160
  with open(input_file.name, 'r', encoding='utf-8') as file:
161
  content = file.read()
162
  words, tokens = count_words_and_tokens(content)
163
- input_info = f"Text file loaded. Words: {words}, Tokens: {tokens}"
164
  video_path = None
165
  elif file_extension == '.pdf':
166
  loader = PyPDFLoader(input_file.name)
167
  pages = loader.load_and_split()
168
  content = '\n'.join([page.page_content for page in pages])
169
  words, tokens = count_words_and_tokens(content)
170
- input_info = f"PDF file loaded. Words: {words}, Tokens: {tokens}"
171
  video_path = None
172
  elif file_extension in ['.mp4', '.avi', '.mov']:
173
  temp_video_path = "temp_video" + file_extension
174
  shutil.copy2(input_file.name, temp_video_path)
175
 
 
 
 
176
  language = "en" # Default to English for video files
177
  diarization.process_video(temp_video_path, hf_token, language)
178
 
@@ -183,43 +189,61 @@ def process_input(input_file):
183
  input_info = f"Video transcribed. Words: {words}, Tokens: {tokens}"
184
  video_path = temp_video_path
185
  else:
186
- return "Unsupported file format. Please upload a TXT, PDF, or video file.", None, None, None, None, None
187
 
188
  detected_language = detect_language(content)
189
 
 
 
 
190
  attachments_chain, bigfive_chain, personalities_chain = lazy_chains.get_chains()
191
 
192
  attachments_result = attachments_chain({"query": content})
193
- attachments_answer = attachments_result['result'].split("---------------------------\n\nAnswer:")[-1].strip()
194
 
195
  bigfive_result = bigfive_chain({"query": content})
196
- bigfive_answer = bigfive_result['result'].split("---------------------------\n\nAnswer:")[-1].strip()
197
 
198
  personalities_result = personalities_chain({"query": content})
199
- personalities_answer = personalities_result['result'].split("---------------------------\n\nAnswer:")[-1].strip()
200
 
201
  end_time = time.time()
202
  execution_time = end_time - start_time
203
 
204
- execution_info = f"Execution Time: {execution_time:.2f} seconds\nFile Type: {file_extension}\nDetected Language: {detected_language}"
205
 
206
- return execution_info, input_info, attachments_answer, bigfive_answer, personalities_answer, video_path
207
-
208
- iface = gr.Interface(
209
- fn=process_input,
210
- inputs=gr.File(label="Upload File (TXT, PDF, or Video)"),
211
- outputs=[
212
- gr.Textbox(label="Execution Information"),
213
- gr.Textbox(label="Input Information"),
214
- gr.Textbox(label="Attachments Results"),
215
- gr.Textbox(label="Big Five Results"),
216
- gr.Textbox(label="Personalities Results"),
217
- gr.Video(label="Input Video")
218
- ],
219
- title="Personality Analysis Classification",
220
- description="Upload a Video, TXT, or PDF file.",
221
- allow_flagging="never"
222
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
  # Launch the app
225
  iface.launch()
 
53
  "text-generation",
54
  model=model,
55
  tokenizer=tokenizer,
56
+ max_length = 2000,
57
  max_new_tokens=512,
58
+ temperature=0.8,
59
  )
60
  return self.pipeline
61
 
 
113
 
114
  def create_prompt(self, task):
115
  return PromptTemplate(
116
+ template=task + "\n\nContext: {context}\n\nTask: {question}\n\n-----------\n\nAnswer: ",
117
  input_variables=["context", "question"]
118
  )
119
 
 
147
 
148
  def count_words_and_tokens(text):
149
  words = len(text.split())
150
+ tokens = len(AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3").tokenize(text))
151
  return words, tokens
152
 
153
  @spaces.GPU(duration=150)
154
  def process_input(input_file):
155
  start_time = time.time()
156
 
157
+ progress_info = "Processing file..."
158
+ yield progress_info, None, None, None, None, None, None, None, None
159
+
160
  file_extension = os.path.splitext(input_file.name)[1].lower()
161
 
162
  if file_extension == '.txt':
163
  with open(input_file.name, 'r', encoding='utf-8') as file:
164
  content = file.read()
165
  words, tokens = count_words_and_tokens(content)
166
+ input_info = f"Text file processed. Words: {words}, Tokens: {tokens}"
167
  video_path = None
168
  elif file_extension == '.pdf':
169
  loader = PyPDFLoader(input_file.name)
170
  pages = loader.load_and_split()
171
  content = '\n'.join([page.page_content for page in pages])
172
  words, tokens = count_words_and_tokens(content)
173
+ input_info = f"PDF file processed. Words: {words}, Tokens: {tokens}"
174
  video_path = None
175
  elif file_extension in ['.mp4', '.avi', '.mov']:
176
  temp_video_path = "temp_video" + file_extension
177
  shutil.copy2(input_file.name, temp_video_path)
178
 
179
+ progress_info = "Transcribing video..."
180
+ yield progress_info, None, None, None, None, None, None, None, temp_video_path
181
+
182
  language = "en" # Default to English for video files
183
  diarization.process_video(temp_video_path, hf_token, language)
184
 
 
189
  input_info = f"Video transcribed. Words: {words}, Tokens: {tokens}"
190
  video_path = temp_video_path
191
  else:
192
+ return "Unsupported file format. Please upload a TXT, PDF, or video file.", None, None, None, None, None, None, None, None
193
 
194
  detected_language = detect_language(content)
195
 
196
+ progress_info = "Analyzing content..."
197
+ yield progress_info, None, detected_language, input_info, None, None, None, None, video_path
198
+
199
  attachments_chain, bigfive_chain, personalities_chain = lazy_chains.get_chains()
200
 
201
  attachments_result = attachments_chain({"query": content})
202
+ attachments_answer = attachments_result['result'].split("-----------\n\nAnswer:")[-1].strip()
203
 
204
  bigfive_result = bigfive_chain({"query": content})
205
+ bigfive_answer = bigfive_result['result'].split("-----------\n\nAnswer:")[-1].strip()
206
 
207
  personalities_result = personalities_chain({"query": content})
208
+ personalities_answer = personalities_result['result'].split("-----------\n\nAnswer:")[-1].strip()
209
 
210
  end_time = time.time()
211
  execution_time = end_time - start_time
212
 
213
+ execution_info = f"{execution_time:.2f} seconds"
214
 
215
+ progress_info = "Analysis complete!"
216
+
217
+ yield progress_info, execution_info, detected_language, input_info, attachments_answer, bigfive_answer, personalities_answer, video_path
218
+
219
+ def create_interface():
220
+ with gr.Blocks() as iface:
221
+ gr.Markdown("# Personality Analysis Classification")
222
+ gr.Markdown("Upload a Video, TXT, or PDF file.")
223
+
224
+ with gr.Row():
225
+ input_file = gr.File(label="Upload File (TXT, PDF, or Video)")
226
+
227
+ with gr.Column():
228
+ progress = gr.Textbox(label="Progress")
229
+ execution_time = gr.Textbox(label="Execution Time", visible=False)
230
+ detected_language = gr.Textbox(label="Detected Language", visible=False)
231
+ input_info = gr.Textbox(label="Input Information", visible=False)
232
+ video_output = gr.Video(label="Input Video", visible=False)
233
+ attachments_output = gr.Textbox(label="Attachments Results", visible=False)
234
+ bigfive_output = gr.Textbox(label="Big Five Results", visible=False)
235
+ personalities_output = gr.Textbox(label="Personalities Results", visible=False)
236
+
237
+ input_file.upload(
238
+ fn=process_input,
239
+ inputs=[input_file],
240
+ outputs=[progress, execution_time, detected_language, input_info, attachments_output, bigfive_output, personalities_output, video_output],
241
+ show_progress=True
242
+ )
243
+
244
+ return iface
245
+
246
+ iface = create_interface()
247
 
248
  # Launch the app
249
  iface.launch()