khurrameycon commited on
Commit
a1a0caf
·
verified ·
1 Parent(s): a318fb7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -34
app.py CHANGED
@@ -119,51 +119,37 @@ def llm_chat_response(text, image_base64=None):
119
  HF_TOKEN = os.getenv("HF_TOKEN")
120
  client = InferenceClient(api_key=HF_TOKEN)
121
 
122
- # Create a proper conversational format as required by the API
123
- if image_base64:
124
- # For image + text, we need to use the conversation format
125
- messages = [
126
- {
127
- "role": "user",
128
- "content": [
129
- {
130
- "type": "text",
131
- "text": text if text else "Describe what you see in the image"
132
- },
133
- {
134
- "type": "image",
135
- "image": {
136
- "data": image_base64
137
- }
138
- }
139
- ]
140
- }
141
- ]
142
- else:
143
- # Text only
144
- messages = [
145
- {
146
- "role": "user",
147
- "content": [
148
- {
149
- "type": "text",
150
- "text": text + " Describe in one line only."
151
- }
152
- ]
153
- }
154
- ]
155
 
156
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  response_from_llama = client.chat.completions.create(
158
  model="meta-llama/Llama-3.2-11B-Vision-Instruct",
159
  messages=messages,
160
  max_tokens=500
161
  )
 
162
  return response_from_llama.choices[0].message['content']
163
  except Exception as e:
164
  print(f"Error calling LLM API: {e}")
165
  # Fallback response in case of error
166
- return "I couldn't process that image. Please try again with a different image or text query."
167
 
168
  app = FastAPI()
169
  # Initialize pipeline once at startup
 
119
  HF_TOKEN = os.getenv("HF_TOKEN")
120
  client = InferenceClient(api_key=HF_TOKEN)
121
 
122
+ # For image + text requests, we need to use the conversational format
123
+ # with proper message structure
124
+ system_message = "You are a helpful assistant that provides concise responses."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  try:
127
+ if image_base64:
128
+ messages = [
129
+ {"role": "system", "content": system_message},
130
+ {"role": "user", "content": [
131
+ {"type": "text", "text": text if text else "Describe what you see in the image in one line only"},
132
+ {"type": "image", "source": {"data": f"data:image/jpeg;base64,{image_base64}"}}
133
+ ]}
134
+ ]
135
+ else:
136
+ messages = [
137
+ {"role": "system", "content": system_message},
138
+ {"role": "user", "content": text + " Describe in one line only."}
139
+ ]
140
+
141
+ # Call the API
142
  response_from_llama = client.chat.completions.create(
143
  model="meta-llama/Llama-3.2-11B-Vision-Instruct",
144
  messages=messages,
145
  max_tokens=500
146
  )
147
+
148
  return response_from_llama.choices[0].message['content']
149
  except Exception as e:
150
  print(f"Error calling LLM API: {e}")
151
  # Fallback response in case of error
152
+ return "I couldn't process that input. Please try again with a different image or text query."
153
 
154
  app = FastAPI()
155
  # Initialize pipeline once at startup