wt002 commited on
Commit
f57a425
·
verified ·
1 Parent(s): 2b9b092

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -44
app.py CHANGED
@@ -272,7 +272,6 @@ class VideoTranscriptionTool(Tool):
272
  import os
273
  import base64
274
  import requests
275
- import google.generativeai as genai
276
  from PIL import Image
277
  from io import BytesIO
278
  from smolagents import (
@@ -291,49 +290,11 @@ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
291
 
292
  # Define image analysis tool
293
  @tool
294
- def analyze_image(image_input: str) -> str:
295
- """
296
- Analyzes images using AI vision. Input can be:
297
- - Image URL (http/https)
298
- - Base64 encoded image
299
- - Local file path
300
- Returns detailed image analysis.
301
- """
302
- try:
303
- # Handle URL input
304
- if image_input.startswith(('http://', 'https://')):
305
- response = requests.get(image_input)
306
- response.raise_for_status()
307
- img = Image.open(BytesIO(response.content))
308
- buffer = BytesIO()
309
- img.save(buffer, format="JPEG")
310
- image_data = base64.b64encode(buffer.getvalue()).decode('utf-8')
311
-
312
- # Handle base64 input
313
- elif image_input.startswith('data:image'):
314
- image_data = image_input.split(',')[1]
315
-
316
- # Handle local file path
317
- elif os.path.exists(image_input):
318
- with open(image_input, "rb") as img_file:
319
- image_data = base64.b64encode(img_file.read()).decode('utf-8')
320
-
321
- else:
322
- return "Invalid image input"
323
-
324
- # Analyze with Gemini
325
- model = genai.GenerativeModel('gemini-pro-vision')
326
- response = model.generate_content([
327
- "Analyze this image thoroughly. Describe all significant elements, text, objects, and context.",
328
- genai.types.Part.from_data(
329
- data=base64.b64decode(image_data),
330
- mime_type="image/jpeg"
331
- )
332
- ])
333
- return response.text
334
-
335
- except Exception as e:
336
- return f"Image analysis error: {str(e)}"
337
 
338
  class BasicAgent:
339
  def __init__(self):
 
272
  import os
273
  import base64
274
  import requests
 
275
  from PIL import Image
276
  from io import BytesIO
277
  from smolagents import (
 
290
 
291
  # Define image analysis tool
292
  @tool
293
+ def image_analysis(image_url: str) -> str:
294
+ API_URL = "https://api-inference.huggingface.co/models/llava-hf/llava-1.5-7b-hf"
295
+ response = requests.post(API_URL, json={"inputs": image_url})
296
+ return response.json()[0]['generated_text']
297
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
 
299
  class BasicAgent:
300
  def __init__(self):