Update app/main.py
Browse files- app/main.py +115 -150
app/main.py
CHANGED
|
@@ -273,66 +273,94 @@ async def startup_event():
|
|
| 273 |
print("WARNING: Failed to initialize Vertex AI authentication")
|
| 274 |
|
| 275 |
# Conversion functions
|
| 276 |
-
|
| 277 |
-
SUPPORTED_ROLES = ["user", "model"]
|
| 278 |
-
|
| 279 |
-
def create_gemini_prompt(messages: List[OpenAIMessage]) -> List[Dict[str, Any]]:
|
| 280 |
"""
|
| 281 |
Convert OpenAI messages to Gemini format.
|
| 282 |
-
Returns a
|
| 283 |
"""
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
|
| 289 |
-
#
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
role = message.role
|
| 293 |
-
|
| 294 |
-
# If role is "system", use "user" as specified
|
| 295 |
-
if role == "system":
|
| 296 |
-
role = "user"
|
| 297 |
-
# If role is "assistant", map to "model"
|
| 298 |
-
elif role == "assistant":
|
| 299 |
-
role = "model"
|
| 300 |
|
| 301 |
-
#
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
else:
|
| 306 |
-
#
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 311 |
|
| 312 |
-
#
|
| 313 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
|
| 315 |
-
#
|
| 316 |
if isinstance(message.content, str):
|
| 317 |
-
|
| 318 |
-
|
|
|
|
|
|
|
| 319 |
elif isinstance(message.content, list):
|
| 320 |
-
#
|
|
|
|
|
|
|
| 321 |
for part in message.content:
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
elif part.get('type') == 'image_url':
|
| 326 |
-
image_url = part.get('image_url', {}).get('url', '')
|
| 327 |
-
if image_url.startswith('data:'):
|
| 328 |
-
# Extract mime type and base64 data
|
| 329 |
-
mime_match = re.match(r'data:([^;]+);base64,(.+)', image_url)
|
| 330 |
-
if mime_match:
|
| 331 |
-
mime_type, b64_data = mime_match.groups()
|
| 332 |
-
image_bytes = base64.b64decode(b64_data)
|
| 333 |
-
parts.append(types.Part.from_bytes(data=image_bytes, mime_type=mime_type))
|
| 334 |
elif isinstance(part, ContentPartText):
|
| 335 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
elif isinstance(part, ContentPartImage):
|
| 337 |
image_url = part.image_url.url
|
| 338 |
if image_url.startswith('data:'):
|
|
@@ -341,29 +369,14 @@ def create_gemini_prompt(messages: List[OpenAIMessage]) -> List[Dict[str, Any]]:
|
|
| 341 |
if mime_match:
|
| 342 |
mime_type, b64_data = mime_match.groups()
|
| 343 |
image_bytes = base64.b64decode(b64_data)
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
# Fallback for unexpected format
|
| 347 |
-
parts.append({"text": str(message.content)})
|
| 348 |
-
|
| 349 |
-
# Add the message with role and parts to our list
|
| 350 |
-
gemini_messages.append({
|
| 351 |
-
"role": role,
|
| 352 |
-
"parts": parts
|
| 353 |
-
})
|
| 354 |
-
|
| 355 |
-
print(f"Converted to {len(gemini_messages)} Gemini messages")
|
| 356 |
-
return gemini_messages
|
| 357 |
-
|
| 358 |
-
# No need for the separate image handling branch as we now handle all content types in one flow
|
| 359 |
|
| 360 |
-
def create_encrypted_gemini_prompt(messages: List[OpenAIMessage]) ->
|
| 361 |
"""
|
| 362 |
Convert OpenAI messages to Gemini format with special encoding for the encrypt model.
|
| 363 |
This function URL-encodes user messages and adds specific system instructions.
|
| 364 |
"""
|
| 365 |
-
print("Creating encrypted Gemini prompt...")
|
| 366 |
-
|
| 367 |
# Check if any message contains image content
|
| 368 |
has_images = False
|
| 369 |
for message in messages:
|
|
@@ -382,8 +395,8 @@ def create_encrypted_gemini_prompt(messages: List[OpenAIMessage]) -> List[Dict[s
|
|
| 382 |
if has_images:
|
| 383 |
return create_gemini_prompt(messages)
|
| 384 |
|
| 385 |
-
# Define the instructions to be added
|
| 386 |
-
|
| 387 |
STRICT OPERATING PROTOCOL:
|
| 388 |
1. **Critical Output Format:** Your responses MUST ALWAYS be standard **UNENCODED PLAIN TEXT**. Absolutely NO URL encoding or any other form of encoding in your output. This is non-negotiable.
|
| 389 |
2. **Input Processing (Internal Only):** User messages and conversation history provided to you will be URL-encoded for privacy. Decode these inputs internally before processing.
|
|
@@ -409,8 +422,8 @@ Ready for your request."""
|
|
| 409 |
# Create a new list of messages with the pre-messages and encoded content
|
| 410 |
new_messages = []
|
| 411 |
|
| 412 |
-
# Add a system message with instructions at the beginning
|
| 413 |
-
new_messages.append(OpenAIMessage(role="system", content=
|
| 414 |
|
| 415 |
# Add pre-messages
|
| 416 |
new_messages.extend(pre_messages)
|
|
@@ -418,7 +431,19 @@ Ready for your request."""
|
|
| 418 |
# Process all messages in their original order
|
| 419 |
for i, message in enumerate(messages):
|
| 420 |
if message.role == "system":
|
| 421 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 422 |
new_messages.append(message)
|
| 423 |
|
| 424 |
elif message.role == "user":
|
|
@@ -429,26 +454,12 @@ Ready for your request."""
|
|
| 429 |
content=urllib.parse.quote(message.content)
|
| 430 |
))
|
| 431 |
elif isinstance(message.content, list):
|
| 432 |
-
#
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
if isinstance(part, dict) and part.get('type') == 'text':
|
| 436 |
-
# URL encode text parts
|
| 437 |
-
encoded_parts.append({
|
| 438 |
-
'type': 'text',
|
| 439 |
-
'text': urllib.parse.quote(part.get('text', ''))
|
| 440 |
-
})
|
| 441 |
-
else:
|
| 442 |
-
# Pass through non-text parts (like images)
|
| 443 |
-
encoded_parts.append(part)
|
| 444 |
-
|
| 445 |
-
new_messages.append(OpenAIMessage(
|
| 446 |
-
role=message.role,
|
| 447 |
-
content=encoded_parts
|
| 448 |
-
))
|
| 449 |
else:
|
| 450 |
-
# For assistant messages
|
| 451 |
-
# Check if this is the last
|
| 452 |
is_last_assistant = True
|
| 453 |
for remaining_msg in messages[i+1:]:
|
| 454 |
if remaining_msg.role != "user":
|
|
@@ -462,30 +473,13 @@ Ready for your request."""
|
|
| 462 |
role=message.role,
|
| 463 |
content=urllib.parse.quote(message.content)
|
| 464 |
))
|
| 465 |
-
elif isinstance(message.content, list):
|
| 466 |
-
# Handle list content similar to user messages
|
| 467 |
-
encoded_parts = []
|
| 468 |
-
for part in message.content:
|
| 469 |
-
if isinstance(part, dict) and part.get('type') == 'text':
|
| 470 |
-
encoded_parts.append({
|
| 471 |
-
'type': 'text',
|
| 472 |
-
'text': urllib.parse.quote(part.get('text', ''))
|
| 473 |
-
})
|
| 474 |
-
else:
|
| 475 |
-
encoded_parts.append(part)
|
| 476 |
-
|
| 477 |
-
new_messages.append(OpenAIMessage(
|
| 478 |
-
role=message.role,
|
| 479 |
-
content=encoded_parts
|
| 480 |
-
))
|
| 481 |
else:
|
| 482 |
-
# For non-string
|
| 483 |
new_messages.append(message)
|
| 484 |
else:
|
| 485 |
-
# For other
|
| 486 |
new_messages.append(message)
|
| 487 |
|
| 488 |
-
print(f"Created encrypted prompt with {len(new_messages)} messages")
|
| 489 |
# Now use the standard function to convert to Gemini format
|
| 490 |
return create_gemini_prompt(new_messages)
|
| 491 |
|
|
@@ -832,14 +826,6 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
| 832 |
prompt = create_encrypted_gemini_prompt(request.messages)
|
| 833 |
else:
|
| 834 |
prompt = create_gemini_prompt(request.messages)
|
| 835 |
-
|
| 836 |
-
# Log the structure of the prompt (without exposing sensitive content)
|
| 837 |
-
print(f"Prompt structure: {len(prompt)} messages")
|
| 838 |
-
for i, msg in enumerate(prompt):
|
| 839 |
-
role = msg.get('role', 'unknown')
|
| 840 |
-
parts_count = len(msg.get('parts', []))
|
| 841 |
-
parts_types = [type(p).__name__ for p in msg.get('parts', [])]
|
| 842 |
-
print(f" Message {i+1}: role={role}, parts={parts_count}, types={parts_types}")
|
| 843 |
|
| 844 |
if request.stream:
|
| 845 |
# Handle streaming response
|
|
@@ -852,22 +838,12 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
| 852 |
# If multiple candidates are requested, we'll generate them sequentially
|
| 853 |
for candidate_index in range(candidate_count):
|
| 854 |
# Generate content with streaming
|
| 855 |
-
# Handle
|
| 856 |
-
|
| 857 |
-
|
| 858 |
-
|
| 859 |
-
|
| 860 |
-
|
| 861 |
-
config=generation_config,
|
| 862 |
-
)
|
| 863 |
-
except Exception as e:
|
| 864 |
-
# If the above format doesn't work, try the direct format
|
| 865 |
-
print(f"First streaming attempt failed: {e}. Trying direct format...")
|
| 866 |
-
responses = client.models.generate_content_stream(
|
| 867 |
-
model=gemini_model,
|
| 868 |
-
contents=prompt, # Try direct format
|
| 869 |
-
config=generation_config,
|
| 870 |
-
)
|
| 871 |
|
| 872 |
# Convert and yield each chunk
|
| 873 |
for response in responses:
|
|
@@ -897,23 +873,12 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
| 897 |
# Make sure generation_config has candidate_count set
|
| 898 |
if "candidate_count" not in generation_config:
|
| 899 |
generation_config["candidate_count"] = request.n
|
| 900 |
-
# Handle
|
| 901 |
-
|
| 902 |
-
|
| 903 |
-
|
| 904 |
-
|
| 905 |
-
|
| 906 |
-
contents={"contents": prompt}, # Wrap in contents field as per API docs
|
| 907 |
-
config=generation_config,
|
| 908 |
-
)
|
| 909 |
-
except Exception as e:
|
| 910 |
-
# If the above format doesn't work, try the direct format
|
| 911 |
-
print(f"First attempt failed: {e}. Trying direct format...")
|
| 912 |
-
response = client.models.generate_content(
|
| 913 |
-
model=gemini_model,
|
| 914 |
-
contents=prompt, # Try direct format
|
| 915 |
-
config=generation_config,
|
| 916 |
-
)
|
| 917 |
|
| 918 |
|
| 919 |
openai_response = convert_to_openai_format(response, request.model)
|
|
|
|
| 273 |
print("WARNING: Failed to initialize Vertex AI authentication")
|
| 274 |
|
| 275 |
# Conversion functions
|
| 276 |
+
def create_gemini_prompt(messages: List[OpenAIMessage]) -> Union[str, List[Any]]:
|
|
|
|
|
|
|
|
|
|
| 277 |
"""
|
| 278 |
Convert OpenAI messages to Gemini format.
|
| 279 |
+
Returns either a string prompt or a list of content parts if images are present.
|
| 280 |
"""
|
| 281 |
+
# Check if any message contains image content
|
| 282 |
+
has_images = False
|
| 283 |
+
for message in messages:
|
| 284 |
+
if isinstance(message.content, list):
|
| 285 |
+
for part in message.content:
|
| 286 |
+
if isinstance(part, dict) and part.get('type') == 'image_url':
|
| 287 |
+
has_images = True
|
| 288 |
+
break
|
| 289 |
+
elif isinstance(part, ContentPartImage):
|
| 290 |
+
has_images = True
|
| 291 |
+
break
|
| 292 |
+
if has_images:
|
| 293 |
+
break
|
| 294 |
|
| 295 |
+
# If no images, use the text-only format
|
| 296 |
+
if not has_images:
|
| 297 |
+
prompt = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
|
| 299 |
+
# Process all messages in their original order
|
| 300 |
+
for message in messages:
|
| 301 |
+
# Handle both string and list[dict] content types
|
| 302 |
+
content_text = ""
|
| 303 |
+
if isinstance(message.content, str):
|
| 304 |
+
content_text = message.content
|
| 305 |
+
elif isinstance(message.content, list) and message.content and isinstance(message.content[0], dict) and 'text' in message.content[0]:
|
| 306 |
+
content_text = message.content[0]['text']
|
| 307 |
else:
|
| 308 |
+
# Fallback for unexpected format
|
| 309 |
+
content_text = str(message.content)
|
| 310 |
+
|
| 311 |
+
if message.role == "system":
|
| 312 |
+
prompt += f"System: {content_text}\n\n"
|
| 313 |
+
elif message.role == "user":
|
| 314 |
+
prompt += f"Human: {content_text}\n"
|
| 315 |
+
elif message.role == "assistant":
|
| 316 |
+
prompt += f"AI: {content_text}\n"
|
| 317 |
|
| 318 |
+
# Add final AI prompt if last message was from user
|
| 319 |
+
if messages[-1].role == "user":
|
| 320 |
+
prompt += "AI: "
|
| 321 |
+
|
| 322 |
+
return prompt
|
| 323 |
+
|
| 324 |
+
# If images are present, create a list of content parts
|
| 325 |
+
gemini_contents = []
|
| 326 |
+
|
| 327 |
+
# Process all messages in their original order
|
| 328 |
+
for message in messages:
|
| 329 |
|
| 330 |
+
# For string content, add as text
|
| 331 |
if isinstance(message.content, str):
|
| 332 |
+
prefix = "Human: " if message.role == "user" else "AI: "
|
| 333 |
+
gemini_contents.append(f"{prefix}{message.content}")
|
| 334 |
+
|
| 335 |
+
# For list content, process each part
|
| 336 |
elif isinstance(message.content, list):
|
| 337 |
+
# First collect all text parts
|
| 338 |
+
text_content = ""
|
| 339 |
+
|
| 340 |
for part in message.content:
|
| 341 |
+
# Handle text parts
|
| 342 |
+
if isinstance(part, dict) and part.get('type') == 'text':
|
| 343 |
+
text_content += part.get('text', '')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
elif isinstance(part, ContentPartText):
|
| 345 |
+
text_content += part.text
|
| 346 |
+
|
| 347 |
+
# Add the combined text content if any
|
| 348 |
+
if text_content:
|
| 349 |
+
prefix = "Human: " if message.role == "user" else "AI: "
|
| 350 |
+
gemini_contents.append(f"{prefix}{text_content}")
|
| 351 |
+
|
| 352 |
+
# Then process image parts
|
| 353 |
+
for part in message.content:
|
| 354 |
+
# Handle image parts
|
| 355 |
+
if isinstance(part, dict) and part.get('type') == 'image_url':
|
| 356 |
+
image_url = part.get('image_url', {}).get('url', '')
|
| 357 |
+
if image_url.startswith('data:'):
|
| 358 |
+
# Extract mime type and base64 data
|
| 359 |
+
mime_match = re.match(r'data:([^;]+);base64,(.+)', image_url)
|
| 360 |
+
if mime_match:
|
| 361 |
+
mime_type, b64_data = mime_match.groups()
|
| 362 |
+
image_bytes = base64.b64decode(b64_data)
|
| 363 |
+
gemini_contents.append(types.Part.from_bytes(data=image_bytes, mime_type=mime_type))
|
| 364 |
elif isinstance(part, ContentPartImage):
|
| 365 |
image_url = part.image_url.url
|
| 366 |
if image_url.startswith('data:'):
|
|
|
|
| 369 |
if mime_match:
|
| 370 |
mime_type, b64_data = mime_match.groups()
|
| 371 |
image_bytes = base64.b64decode(b64_data)
|
| 372 |
+
gemini_contents.append(types.Part.from_bytes(data=image_bytes, mime_type=mime_type))
|
| 373 |
+
return gemini_contents
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 374 |
|
| 375 |
+
def create_encrypted_gemini_prompt(messages: List[OpenAIMessage]) -> Union[str, List[Any]]:
|
| 376 |
"""
|
| 377 |
Convert OpenAI messages to Gemini format with special encoding for the encrypt model.
|
| 378 |
This function URL-encodes user messages and adds specific system instructions.
|
| 379 |
"""
|
|
|
|
|
|
|
| 380 |
# Check if any message contains image content
|
| 381 |
has_images = False
|
| 382 |
for message in messages:
|
|
|
|
| 395 |
if has_images:
|
| 396 |
return create_gemini_prompt(messages)
|
| 397 |
|
| 398 |
+
# Define the Chinese instructions to be added
|
| 399 |
+
chinese_instructions = """// AI Assistant Configuration //
|
| 400 |
STRICT OPERATING PROTOCOL:
|
| 401 |
1. **Critical Output Format:** Your responses MUST ALWAYS be standard **UNENCODED PLAIN TEXT**. Absolutely NO URL encoding or any other form of encoding in your output. This is non-negotiable.
|
| 402 |
2. **Input Processing (Internal Only):** User messages and conversation history provided to you will be URL-encoded for privacy. Decode these inputs internally before processing.
|
|
|
|
| 422 |
# Create a new list of messages with the pre-messages and encoded content
|
| 423 |
new_messages = []
|
| 424 |
|
| 425 |
+
# Add a system message with Chinese instructions at the beginning
|
| 426 |
+
new_messages.append(OpenAIMessage(role="system", content=chinese_instructions))
|
| 427 |
|
| 428 |
# Add pre-messages
|
| 429 |
new_messages.extend(pre_messages)
|
|
|
|
| 431 |
# Process all messages in their original order
|
| 432 |
for i, message in enumerate(messages):
|
| 433 |
if message.role == "system":
|
| 434 |
+
# # URL encode system message content
|
| 435 |
+
# if isinstance(message.content, str):
|
| 436 |
+
# system_content = message.content
|
| 437 |
+
# elif isinstance(message.content, list) and message.content and isinstance(message.content[0], dict) and 'text' in message.content[0]:
|
| 438 |
+
# system_content = message.content[0]['text']
|
| 439 |
+
# else:
|
| 440 |
+
# system_content = str(message.content)
|
| 441 |
+
|
| 442 |
+
# # URL encode the system message content
|
| 443 |
+
# new_messages.append(OpenAIMessage(
|
| 444 |
+
# role="system",
|
| 445 |
+
# content=urllib.parse.quote(system_content)
|
| 446 |
+
# ))
|
| 447 |
new_messages.append(message)
|
| 448 |
|
| 449 |
elif message.role == "user":
|
|
|
|
| 454 |
content=urllib.parse.quote(message.content)
|
| 455 |
))
|
| 456 |
elif isinstance(message.content, list):
|
| 457 |
+
# Handle list content (like with images)
|
| 458 |
+
# For simplicity, we'll just pass it through as is
|
| 459 |
+
new_messages.append(message)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 460 |
else:
|
| 461 |
+
# For non-user messages (assistant messages)
|
| 462 |
+
# Check if this is the last non-user message in the list
|
| 463 |
is_last_assistant = True
|
| 464 |
for remaining_msg in messages[i+1:]:
|
| 465 |
if remaining_msg.role != "user":
|
|
|
|
| 473 |
role=message.role,
|
| 474 |
content=urllib.parse.quote(message.content)
|
| 475 |
))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 476 |
else:
|
| 477 |
+
# For non-string content, keep as is
|
| 478 |
new_messages.append(message)
|
| 479 |
else:
|
| 480 |
+
# For other non-user messages, keep as is
|
| 481 |
new_messages.append(message)
|
| 482 |
|
|
|
|
| 483 |
# Now use the standard function to convert to Gemini format
|
| 484 |
return create_gemini_prompt(new_messages)
|
| 485 |
|
|
|
|
| 826 |
prompt = create_encrypted_gemini_prompt(request.messages)
|
| 827 |
else:
|
| 828 |
prompt = create_gemini_prompt(request.messages)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 829 |
|
| 830 |
if request.stream:
|
| 831 |
# Handle streaming response
|
|
|
|
| 838 |
# If multiple candidates are requested, we'll generate them sequentially
|
| 839 |
for candidate_index in range(candidate_count):
|
| 840 |
# Generate content with streaming
|
| 841 |
+
# Handle both string and list content formats (for images)
|
| 842 |
+
responses = client.models.generate_content_stream(
|
| 843 |
+
model=gemini_model,
|
| 844 |
+
contents=prompt, # This can be either a string or a list of content parts
|
| 845 |
+
config=generation_config,
|
| 846 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 847 |
|
| 848 |
# Convert and yield each chunk
|
| 849 |
for response in responses:
|
|
|
|
| 873 |
# Make sure generation_config has candidate_count set
|
| 874 |
if "candidate_count" not in generation_config:
|
| 875 |
generation_config["candidate_count"] = request.n
|
| 876 |
+
# Handle both string and list content formats (for images)
|
| 877 |
+
response = client.models.generate_content(
|
| 878 |
+
model=gemini_model,
|
| 879 |
+
contents=prompt, # This can be either a string or a list of content parts
|
| 880 |
+
config=generation_config,
|
| 881 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 882 |
|
| 883 |
|
| 884 |
openai_response = convert_to_openai_format(response, request.model)
|