TTS_API_Image_fallback

Sleeping

App Files Files Community

khurrameycon commited on Apr 6

Commit

a1a0caf

verified ·

1 Parent(s): a318fb7

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -34

app.py CHANGED Viewed

@@ -119,51 +119,37 @@ def llm_chat_response(text, image_base64=None):
     HF_TOKEN = os.getenv("HF_TOKEN")
     client = InferenceClient(api_key=HF_TOKEN)
-    # Create a proper conversational format as required by the API
-    if image_base64:
-        # For image + text, we need to use the conversation format
-        messages = [
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "text",
-                        "text": text if text else "Describe what you see in the image"
-                    },
-                    {
-                        "type": "image",
-                        "image": {
-                            "data": image_base64
-                        }
-                    }
-                ]
-            }
-        ]
-    else:
-        # Text only
-        messages = [
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "text",
-                        "text": text + " Describe in one line only."
-                    }
-                ]
-            }
-        ]
     try:
         response_from_llama = client.chat.completions.create(
             model="meta-llama/Llama-3.2-11B-Vision-Instruct",
             messages=messages,
             max_tokens=500
         )
         return response_from_llama.choices[0].message['content']
     except Exception as e:
         print(f"Error calling LLM API: {e}")
         # Fallback response in case of error
-        return "I couldn't process that image. Please try again with a different image or text query."
 app = FastAPI()
 # Initialize pipeline once at startup

     HF_TOKEN = os.getenv("HF_TOKEN")
     client = InferenceClient(api_key=HF_TOKEN)
+    # For image + text requests, we need to use the conversational format
+    # with proper message structure
+    system_message = "You are a helpful assistant that provides concise responses."
     try:
+        if image_base64:
+            messages = [
+                {"role": "system", "content": system_message},
+                {"role": "user", "content": [
+                    {"type": "text", "text": text if text else "Describe what you see in the image in one line only"},
+                    {"type": "image", "source": {"data": f"data:image/jpeg;base64,{image_base64}"}}
+                ]}
+            ]
+        else:
+            messages = [
+                {"role": "system", "content": system_message},
+                {"role": "user", "content": text + " Describe in one line only."}
+            ]
+        # Call the API
         response_from_llama = client.chat.completions.create(
             model="meta-llama/Llama-3.2-11B-Vision-Instruct",
             messages=messages,
             max_tokens=500
         )
         return response_from_llama.choices[0].message['content']
     except Exception as e:
         print(f"Error calling LLM API: {e}")
         # Fallback response in case of error
+        return "I couldn't process that input. Please try again with a different image or text query."
 app = FastAPI()
 # Initialize pipeline once at startup