Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -231,41 +231,73 @@ def parse_excel_to_json(task_id: str) -> dict:
|
|
231 |
|
232 |
|
233 |
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
238 |
inputs = {
|
239 |
-
"
|
240 |
-
|
|
|
|
|
241 |
}
|
242 |
-
output_type = "string"
|
243 |
|
244 |
-
def
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
elif len(url.strip()) == 11: # Direct ID
|
251 |
-
video_id = url.strip()
|
252 |
-
else:
|
253 |
-
return f"YouTube URL or ID: {url} is invalid!"
|
254 |
|
255 |
try:
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
264 |
else:
|
265 |
-
return "
|
|
|
|
|
266 |
|
267 |
except Exception as e:
|
268 |
-
return f"
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
|
270 |
|
271 |
|
@@ -277,7 +309,7 @@ from io import BytesIO
|
|
277 |
from smolagents import (
|
278 |
CodeAgent,
|
279 |
ToolCallingAgent,
|
280 |
-
InferenceClientModel,
|
281 |
WebSearchTool,
|
282 |
HfApiModel,
|
283 |
DuckDuckGoSearchTool,
|
@@ -285,15 +317,61 @@ from smolagents import (
|
|
285 |
tool
|
286 |
)
|
287 |
|
288 |
-
# Configure Gemini
|
289 |
-
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
|
290 |
|
291 |
# Define image analysis tool
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
297 |
|
298 |
|
299 |
class BasicAgent:
|
@@ -313,6 +391,8 @@ class BasicAgent:
|
|
313 |
visit_webpage_tool = VisitWebpageTool()
|
314 |
final_answer_tool = FinalAnswerTool()
|
315 |
video_transcription_tool = VideoTranscriptionTool()
|
|
|
|
|
316 |
code_llama_tool = CodeLlamaTool()
|
317 |
|
318 |
system_prompt = f"""
|
|
|
231 |
|
232 |
|
233 |
|
234 |
+
import os
|
235 |
+
from langchain_community.document_loaders import PyMuPDFLoader
|
236 |
+
from docx import Document as DocxDocument
|
237 |
+
import openpyxl
|
238 |
+
|
239 |
+
class AnalyseAttachmentTool:
|
240 |
+
"""
|
241 |
+
A tool for analyzing various attachment types (PY, PDF, TXT, DOCX, XLSX)
|
242 |
+
and extracting their text content.
|
243 |
+
"""
|
244 |
+
|
245 |
+
name = "analyze_attachment"
|
246 |
+
description = (
|
247 |
+
"Analyzes attachments including PY, PDF, TXT, DOCX, and XLSX files and returns text content. "
|
248 |
+
"Useful for understanding the content of various document types. "
|
249 |
+
"The output is limited to the first 3000 characters for readability."
|
250 |
+
)
|
251 |
inputs = {
|
252 |
+
"file_path": {
|
253 |
+
"type": "string",
|
254 |
+
"description": "Local path to the attachment file (e.g., 'documents/report.pdf').",
|
255 |
+
}
|
256 |
}
|
|
|
257 |
|
258 |
+
def _run(self, file_path: str) -> str:
|
259 |
+
"""
|
260 |
+
Executes the attachment analysis. This method is called internally by the tool.
|
261 |
+
"""
|
262 |
+
if not os.path.exists(file_path):
|
263 |
+
return f"File not found: {file_path}"
|
|
|
|
|
|
|
|
|
264 |
|
265 |
try:
|
266 |
+
ext = os.path.splitext(file_path)[1].lower()
|
267 |
+
content = ""
|
268 |
+
|
269 |
+
if ext == ".pdf":
|
270 |
+
loader = PyMuPDFLoader(file_path)
|
271 |
+
documents = loader.load()
|
272 |
+
content = "\n\n".join([doc.page_content for doc in documents])
|
273 |
+
|
274 |
+
elif ext == ".txt" or ext == ".py":
|
275 |
+
with open(file_path, "r", encoding="utf-8") as file:
|
276 |
+
content = file.read()
|
277 |
+
|
278 |
+
elif ext == ".docx":
|
279 |
+
doc = DocxDocument(file_path)
|
280 |
+
content = "\n".join([para.text for para in doc.paragraphs])
|
281 |
+
|
282 |
+
elif ext == ".xlsx":
|
283 |
+
wb = openpyxl.load_workbook(file_path, data_only=True)
|
284 |
+
for sheet in wb:
|
285 |
+
content += f"Sheet: {sheet.title}\n"
|
286 |
+
for row in sheet.iter_rows(values_only=True):
|
287 |
+
content += "\t".join([str(cell) if cell is not None else "" for cell in row]) + "\n"
|
288 |
else:
|
289 |
+
return "Unsupported file format. Please use PY, PDF, TXT, DOCX, or XLSX."
|
290 |
+
|
291 |
+
return content[:3000]
|
292 |
|
293 |
except Exception as e:
|
294 |
+
return f"An error occurred while processing the file: {str(e)}"
|
295 |
+
|
296 |
+
def __call__(self, file_path: str) -> str:
|
297 |
+
"""
|
298 |
+
Makes the instance callable directly, invoking the _run method.
|
299 |
+
"""
|
300 |
+
return self._run(file_path)
|
301 |
|
302 |
|
303 |
|
|
|
309 |
from smolagents import (
|
310 |
CodeAgent,
|
311 |
ToolCallingAgent,
|
312 |
+
#InferenceClientModel,
|
313 |
WebSearchTool,
|
314 |
HfApiModel,
|
315 |
DuckDuckGoSearchTool,
|
|
|
317 |
tool
|
318 |
)
|
319 |
|
|
|
|
|
320 |
|
321 |
# Define image analysis tool
|
322 |
+
import requests
|
323 |
+
|
324 |
+
class ImageAnalysisTool:
|
325 |
+
"""
|
326 |
+
A tool for analyzing images using a hosted Hugging Face model.
|
327 |
+
"""
|
328 |
+
|
329 |
+
name = "image_analysis"
|
330 |
+
description = (
|
331 |
+
"Analyzes an image provided via a URL and returns a textual description of its content. "
|
332 |
+
"This tool is useful for understanding the visual content of an image."
|
333 |
+
)
|
334 |
+
inputs = {
|
335 |
+
"image_url": {
|
336 |
+
"type": "string",
|
337 |
+
"description": "The URL of the image to be analyzed (e.g., 'https://example.com/image.jpg').",
|
338 |
+
}
|
339 |
+
}
|
340 |
+
|
341 |
+
# You might consider making API_URL a class attribute if it's constant
|
342 |
+
# or an instance attribute if it could vary per instance.
|
343 |
+
# For this example, we'll keep it within the _run method for directness.
|
344 |
+
|
345 |
+
def _run(self, image_url: str) -> str:
|
346 |
+
"""
|
347 |
+
Executes the image analysis by sending the image URL to the Hugging Face API.
|
348 |
+
"""
|
349 |
+
API_URL = "https://api-inference.huggingface.co/models/llava-hf/llava-1.5-7b-hf"
|
350 |
+
|
351 |
+
try:
|
352 |
+
response = requests.post(API_URL, json={"inputs": image_url})
|
353 |
+
response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
|
354 |
+
|
355 |
+
# Assuming the response structure is always a list with a dictionary
|
356 |
+
# and 'generated_text' is the key for the description.
|
357 |
+
if response.json() and isinstance(response.json(), list) and 'generated_text' in response.json()[0]:
|
358 |
+
return response.json()[0]['generated_text']
|
359 |
+
else:
|
360 |
+
return f"Unexpected API response format: {response.text}"
|
361 |
+
|
362 |
+
except requests.exceptions.RequestException as e:
|
363 |
+
return f"An error occurred during the API request: {e}"
|
364 |
+
except IndexError:
|
365 |
+
return "API response did not contain expected 'generated_text'."
|
366 |
+
except Exception as e:
|
367 |
+
return f"An unexpected error occurred: {e}"
|
368 |
+
|
369 |
+
def __call__(self, image_url: str) -> str:
|
370 |
+
"""
|
371 |
+
Makes the instance callable directly, invoking the _run method for convenience.
|
372 |
+
"""
|
373 |
+
return self._run(image_url)
|
374 |
+
|
375 |
|
376 |
|
377 |
class BasicAgent:
|
|
|
391 |
visit_webpage_tool = VisitWebpageTool()
|
392 |
final_answer_tool = FinalAnswerTool()
|
393 |
video_transcription_tool = VideoTranscriptionTool()
|
394 |
+
Image_Analysis_Tool = ImageAnalysisTool()
|
395 |
+
Analyse_Attachment_Tool = AnalyseAttachmentTool()
|
396 |
code_llama_tool = CodeLlamaTool()
|
397 |
|
398 |
system_prompt = f"""
|