khurrameycon commited on
Commit
a318fb7
·
verified ·
1 Parent(s): cf91db0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -36
app.py CHANGED
@@ -119,44 +119,51 @@ def llm_chat_response(text, image_base64=None):
119
  HF_TOKEN = os.getenv("HF_TOKEN")
120
  client = InferenceClient(api_key=HF_TOKEN)
121
 
122
- message_content = [
123
- {
124
- "type": "text",
125
- "text": text + str('describe in one line only')
126
- }
127
- ]
128
-
129
- # If image_base64 is provided, add it to the message content
130
  if image_base64:
131
- # Convert base64 to PIL Image for validation
132
- try:
133
- image_bytes = base64.b64decode(image_base64)
134
- # Validate that it's a proper image
135
- Image.open(BytesIO(image_bytes))
136
-
137
- # Add the image to message content
138
- message_content.append({
139
- "type": "image",
140
- "image": {
141
- "data": image_base64
142
- }
143
- })
144
- except Exception as e:
145
- print(f"Error processing image: {e}")
146
-
147
- messages = [
148
- {
149
- "role": "user",
150
- "content": message_content
151
- }
152
- ]
 
 
 
 
 
 
 
 
 
153
 
154
- response_from_llama = client.chat.completions.create(
155
- model="meta-llama/Llama-3.2-11B-Vision-Instruct",
156
- messages=messages,
157
- max_tokens=500
158
- )
159
- return response_from_llama.choices[0].message['content']
 
 
 
 
 
160
 
161
  app = FastAPI()
162
  # Initialize pipeline once at startup
 
119
  HF_TOKEN = os.getenv("HF_TOKEN")
120
  client = InferenceClient(api_key=HF_TOKEN)
121
 
122
+ # Create a proper conversational format as required by the API
 
 
 
 
 
 
 
123
  if image_base64:
124
+ # For image + text, we need to use the conversation format
125
+ messages = [
126
+ {
127
+ "role": "user",
128
+ "content": [
129
+ {
130
+ "type": "text",
131
+ "text": text if text else "Describe what you see in the image"
132
+ },
133
+ {
134
+ "type": "image",
135
+ "image": {
136
+ "data": image_base64
137
+ }
138
+ }
139
+ ]
140
+ }
141
+ ]
142
+ else:
143
+ # Text only
144
+ messages = [
145
+ {
146
+ "role": "user",
147
+ "content": [
148
+ {
149
+ "type": "text",
150
+ "text": text + " Describe in one line only."
151
+ }
152
+ ]
153
+ }
154
+ ]
155
 
156
+ try:
157
+ response_from_llama = client.chat.completions.create(
158
+ model="meta-llama/Llama-3.2-11B-Vision-Instruct",
159
+ messages=messages,
160
+ max_tokens=500
161
+ )
162
+ return response_from_llama.choices[0].message['content']
163
+ except Exception as e:
164
+ print(f"Error calling LLM API: {e}")
165
+ # Fallback response in case of error
166
+ return "I couldn't process that image. Please try again with a different image or text query."
167
 
168
  app = FastAPI()
169
  # Initialize pipeline once at startup