Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -53,9 +53,9 @@ class LazyPipeline:
|
|
53 |
"text-generation",
|
54 |
model=model,
|
55 |
tokenizer=tokenizer,
|
56 |
-
max_length =
|
57 |
max_new_tokens=512,
|
58 |
-
temperature=0.
|
59 |
)
|
60 |
return self.pipeline
|
61 |
|
@@ -113,7 +113,7 @@ class LazyChains:
|
|
113 |
|
114 |
def create_prompt(self, task):
|
115 |
return PromptTemplate(
|
116 |
-
template=task + "\n\nContext: {context}\n\nTask: {question}\n\n
|
117 |
input_variables=["context", "question"]
|
118 |
)
|
119 |
|
@@ -147,32 +147,38 @@ lazy_chains = LazyChains(lazy_llm)
|
|
147 |
|
148 |
def count_words_and_tokens(text):
|
149 |
words = len(text.split())
|
150 |
-
tokens = len(AutoTokenizer.from_pretrained("
|
151 |
return words, tokens
|
152 |
|
153 |
@spaces.GPU(duration=150)
|
154 |
def process_input(input_file):
|
155 |
start_time = time.time()
|
156 |
|
|
|
|
|
|
|
157 |
file_extension = os.path.splitext(input_file.name)[1].lower()
|
158 |
|
159 |
if file_extension == '.txt':
|
160 |
with open(input_file.name, 'r', encoding='utf-8') as file:
|
161 |
content = file.read()
|
162 |
words, tokens = count_words_and_tokens(content)
|
163 |
-
input_info = f"Text file
|
164 |
video_path = None
|
165 |
elif file_extension == '.pdf':
|
166 |
loader = PyPDFLoader(input_file.name)
|
167 |
pages = loader.load_and_split()
|
168 |
content = '\n'.join([page.page_content for page in pages])
|
169 |
words, tokens = count_words_and_tokens(content)
|
170 |
-
input_info = f"PDF file
|
171 |
video_path = None
|
172 |
elif file_extension in ['.mp4', '.avi', '.mov']:
|
173 |
temp_video_path = "temp_video" + file_extension
|
174 |
shutil.copy2(input_file.name, temp_video_path)
|
175 |
|
|
|
|
|
|
|
176 |
language = "en" # Default to English for video files
|
177 |
diarization.process_video(temp_video_path, hf_token, language)
|
178 |
|
@@ -183,43 +189,61 @@ def process_input(input_file):
|
|
183 |
input_info = f"Video transcribed. Words: {words}, Tokens: {tokens}"
|
184 |
video_path = temp_video_path
|
185 |
else:
|
186 |
-
return "Unsupported file format. Please upload a TXT, PDF, or video file.", None, None, None, None, None
|
187 |
|
188 |
detected_language = detect_language(content)
|
189 |
|
|
|
|
|
|
|
190 |
attachments_chain, bigfive_chain, personalities_chain = lazy_chains.get_chains()
|
191 |
|
192 |
attachments_result = attachments_chain({"query": content})
|
193 |
-
attachments_answer = attachments_result['result'].split("
|
194 |
|
195 |
bigfive_result = bigfive_chain({"query": content})
|
196 |
-
bigfive_answer = bigfive_result['result'].split("
|
197 |
|
198 |
personalities_result = personalities_chain({"query": content})
|
199 |
-
personalities_answer = personalities_result['result'].split("
|
200 |
|
201 |
end_time = time.time()
|
202 |
execution_time = end_time - start_time
|
203 |
|
204 |
-
execution_info = f"
|
205 |
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
gr.
|
213 |
-
gr.
|
214 |
-
|
215 |
-
gr.
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
|
224 |
# Launch the app
|
225 |
iface.launch()
|
|
|
53 |
"text-generation",
|
54 |
model=model,
|
55 |
tokenizer=tokenizer,
|
56 |
+
max_length = 2000,
|
57 |
max_new_tokens=512,
|
58 |
+
temperature=0.8,
|
59 |
)
|
60 |
return self.pipeline
|
61 |
|
|
|
113 |
|
114 |
def create_prompt(self, task):
|
115 |
return PromptTemplate(
|
116 |
+
template=task + "\n\nContext: {context}\n\nTask: {question}\n\n-----------\n\nAnswer: ",
|
117 |
input_variables=["context", "question"]
|
118 |
)
|
119 |
|
|
|
147 |
|
148 |
def count_words_and_tokens(text):
|
149 |
words = len(text.split())
|
150 |
+
tokens = len(AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3").tokenize(text))
|
151 |
return words, tokens
|
152 |
|
153 |
@spaces.GPU(duration=150)
|
154 |
def process_input(input_file):
|
155 |
start_time = time.time()
|
156 |
|
157 |
+
progress_info = "Processing file..."
|
158 |
+
yield progress_info, None, None, None, None, None, None, None, None
|
159 |
+
|
160 |
file_extension = os.path.splitext(input_file.name)[1].lower()
|
161 |
|
162 |
if file_extension == '.txt':
|
163 |
with open(input_file.name, 'r', encoding='utf-8') as file:
|
164 |
content = file.read()
|
165 |
words, tokens = count_words_and_tokens(content)
|
166 |
+
input_info = f"Text file processed. Words: {words}, Tokens: {tokens}"
|
167 |
video_path = None
|
168 |
elif file_extension == '.pdf':
|
169 |
loader = PyPDFLoader(input_file.name)
|
170 |
pages = loader.load_and_split()
|
171 |
content = '\n'.join([page.page_content for page in pages])
|
172 |
words, tokens = count_words_and_tokens(content)
|
173 |
+
input_info = f"PDF file processed. Words: {words}, Tokens: {tokens}"
|
174 |
video_path = None
|
175 |
elif file_extension in ['.mp4', '.avi', '.mov']:
|
176 |
temp_video_path = "temp_video" + file_extension
|
177 |
shutil.copy2(input_file.name, temp_video_path)
|
178 |
|
179 |
+
progress_info = "Transcribing video..."
|
180 |
+
yield progress_info, None, None, None, None, None, None, None, temp_video_path
|
181 |
+
|
182 |
language = "en" # Default to English for video files
|
183 |
diarization.process_video(temp_video_path, hf_token, language)
|
184 |
|
|
|
189 |
input_info = f"Video transcribed. Words: {words}, Tokens: {tokens}"
|
190 |
video_path = temp_video_path
|
191 |
else:
|
192 |
+
return "Unsupported file format. Please upload a TXT, PDF, or video file.", None, None, None, None, None, None, None, None
|
193 |
|
194 |
detected_language = detect_language(content)
|
195 |
|
196 |
+
progress_info = "Analyzing content..."
|
197 |
+
yield progress_info, None, detected_language, input_info, None, None, None, None, video_path
|
198 |
+
|
199 |
attachments_chain, bigfive_chain, personalities_chain = lazy_chains.get_chains()
|
200 |
|
201 |
attachments_result = attachments_chain({"query": content})
|
202 |
+
attachments_answer = attachments_result['result'].split("-----------\n\nAnswer:")[-1].strip()
|
203 |
|
204 |
bigfive_result = bigfive_chain({"query": content})
|
205 |
+
bigfive_answer = bigfive_result['result'].split("-----------\n\nAnswer:")[-1].strip()
|
206 |
|
207 |
personalities_result = personalities_chain({"query": content})
|
208 |
+
personalities_answer = personalities_result['result'].split("-----------\n\nAnswer:")[-1].strip()
|
209 |
|
210 |
end_time = time.time()
|
211 |
execution_time = end_time - start_time
|
212 |
|
213 |
+
execution_info = f"{execution_time:.2f} seconds"
|
214 |
|
215 |
+
progress_info = "Analysis complete!"
|
216 |
+
|
217 |
+
yield progress_info, execution_info, detected_language, input_info, attachments_answer, bigfive_answer, personalities_answer, video_path
|
218 |
+
|
219 |
+
def create_interface():
|
220 |
+
with gr.Blocks() as iface:
|
221 |
+
gr.Markdown("# Personality Analysis Classification")
|
222 |
+
gr.Markdown("Upload a Video, TXT, or PDF file.")
|
223 |
+
|
224 |
+
with gr.Row():
|
225 |
+
input_file = gr.File(label="Upload File (TXT, PDF, or Video)")
|
226 |
+
|
227 |
+
with gr.Column():
|
228 |
+
progress = gr.Textbox(label="Progress")
|
229 |
+
execution_time = gr.Textbox(label="Execution Time", visible=False)
|
230 |
+
detected_language = gr.Textbox(label="Detected Language", visible=False)
|
231 |
+
input_info = gr.Textbox(label="Input Information", visible=False)
|
232 |
+
video_output = gr.Video(label="Input Video", visible=False)
|
233 |
+
attachments_output = gr.Textbox(label="Attachments Results", visible=False)
|
234 |
+
bigfive_output = gr.Textbox(label="Big Five Results", visible=False)
|
235 |
+
personalities_output = gr.Textbox(label="Personalities Results", visible=False)
|
236 |
+
|
237 |
+
input_file.upload(
|
238 |
+
fn=process_input,
|
239 |
+
inputs=[input_file],
|
240 |
+
outputs=[progress, execution_time, detected_language, input_info, attachments_output, bigfive_output, personalities_output, video_output],
|
241 |
+
show_progress=True
|
242 |
+
)
|
243 |
+
|
244 |
+
return iface
|
245 |
+
|
246 |
+
iface = create_interface()
|
247 |
|
248 |
# Launch the app
|
249 |
iface.launch()
|