New_Final_Assignment

Sleeping

App Files Files Community

naman1102 commited on Jun 11

Commit

85d8e61

1 Parent(s): a14b206

Update tools.py

Browse files

Files changed (1) hide show

tools.py +50 -61

tools.py CHANGED Viewed

@@ -88,67 +88,56 @@ def image_tool(task_id: str) -> str:
     headers = {"Authorization": f"Bearer {hf_token}"}
     print("DEBUG: HF token found, proceeding with API calls")
-    # 4) Call HF's vision-ocr to extract text
-    ocr_text = ""
-    try:
-        print("DEBUG: Calling HF OCR API...")
-        ocr_resp = requests.post(
-            "https://api-inference.huggingface.co/models/microsoft/trocr-base-printed",
-            headers=headers,
-            files={"file": image_bytes},
-            timeout=30
-        )
-        print(f"DEBUG: OCR API response status: {ocr_resp.status_code}")
-        ocr_resp.raise_for_status()
-        ocr_json = ocr_resp.json()
-        print(f"DEBUG: OCR API response: {ocr_json}")
-        # Handle different response formats
-        if isinstance(ocr_json, list) and len(ocr_json) > 0:
-            # If it's a list, take the first result
-            ocr_text = ocr_json[0].get("generated_text", "").strip()
-        elif isinstance(ocr_json, dict):
-            ocr_text = ocr_json.get("generated_text", "").strip()
-        if not ocr_text:
-            ocr_text = "(no visible text detected)"
-        print(f"DEBUG: Extracted OCR text: {ocr_text}")
-    except Exception as e:
-        ocr_text = f"Error during HF OCR: {e}"
-        print(f"DEBUG: OCR failed: {e}")
-    # 5) Call HF's image-captioning to get a brief description
-    caption = ""
-    try:
-        print("DEBUG: Calling HF Image Captioning API...")
-        cap_resp = requests.post(
-            "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base",
-            headers=headers,
-            files={"file": image_bytes},
-            timeout=30
-        )
-        print(f"DEBUG: Captioning API response status: {cap_resp.status_code}")
-        cap_resp.raise_for_status()
-        cap_json = cap_resp.json()
-        print(f"DEBUG: Captioning API response: {cap_json}")
-        # Handle different response formats
-        if isinstance(cap_json, list) and len(cap_json) > 0:
-            caption = cap_json[0].get("generated_text", "").strip()
-        elif isinstance(cap_json, dict):
-            caption = cap_json.get("generated_text", "").strip()
-        if not caption:
-            caption = "(no caption generated)"
-        print(f"DEBUG: Generated caption: {caption}")
-    except Exception as e:
-        caption = f"Error during HF captioning: {e}"
-        print(f"DEBUG: Captioning failed: {e}")
-    # 6) Combine OCR + caption
-    combined = f"OCR text:\n{ocr_text}\n\nImage caption:\n{caption}"
-    print(f"DEBUG: Final result: {combined}")
-    return combined
 @tool
 def excel_tool(task_id: str) -> str:

     headers = {"Authorization": f"Bearer {hf_token}"}
     print("DEBUG: HF token found, proceeding with API calls")
+    # Try different HF models for image analysis
+    models_to_try = [
+        "nlpconnect/vit-gpt2-image-captioning",
+        "Salesforce/blip-image-captioning-large",
+        "microsoft/git-base-coco",
+        "microsoft/git-large-coco"
+    ]
+    result_text = ""
+    success = False
+    for model_name in models_to_try:
+        try:
+            print(f"DEBUG: Trying model: {model_name}")
+            resp = requests.post(
+                f"https://api-inference.huggingface.co/models/{model_name}",
+                headers=headers,
+                files={"file": image_bytes},
+                timeout=30
+            )
+            print(f"DEBUG: {model_name} response status: {resp.status_code}")
+            if resp.status_code == 200:
+                resp_json = resp.json()
+                print(f"DEBUG: {model_name} response: {resp_json}")
+                # Handle different response formats
+                if isinstance(resp_json, list) and len(resp_json) > 0:
+                    result_text = resp_json[0].get("generated_text", "").strip()
+                elif isinstance(resp_json, dict):
+                    result_text = resp_json.get("generated_text", "").strip()
+                if result_text:
+                    print(f"DEBUG: Successfully got result from {model_name}: {result_text}")
+                    success = True
+                    break
+            else:
+                print(f"DEBUG: {model_name} failed with status {resp.status_code}")
+        except Exception as e:
+            print(f"DEBUG: {model_name} failed with error: {e}")
+            continue
+    if not success or not result_text:
+        result_text = "Unable to analyze image - all HuggingFace models failed or returned empty results"
+    # Format the result
+    final_result = f"Image Analysis Result:\n{result_text}"
+    print(f"DEBUG: Final result: {final_result}")
+    return final_result
 @tool
 def excel_tool(task_id: str) -> str: