Spaces:
Running
Running
Commit
·
5a054cc
1
Parent(s):
6a843ee
fixed token refresh bug
Browse files- gemini_proxy.py +335 -186
gemini_proxy.py
CHANGED
@@ -357,19 +357,29 @@ def get_user_project_id(creds):
|
|
357 |
raise
|
358 |
|
359 |
def save_credentials(creds, project_id=None):
|
|
|
|
|
360 |
creds_data = {
|
361 |
"client_id": CLIENT_ID,
|
362 |
"client_secret": CLIENT_SECRET,
|
363 |
-
"
|
364 |
"refresh_token": creds.refresh_token,
|
365 |
-
"
|
366 |
-
"token_type": "Bearer",
|
367 |
"token_uri": "https://oauth2.googleapis.com/token",
|
368 |
}
|
369 |
|
370 |
-
# Add expiry if available
|
371 |
if creds.expiry:
|
372 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
373 |
|
374 |
# If project_id is provided, save it; otherwise preserve existing project_id
|
375 |
if project_id:
|
@@ -383,13 +393,25 @@ def save_credentials(creds, project_id=None):
|
|
383 |
except Exception:
|
384 |
pass # If we can't read existing file, just continue without project_id
|
385 |
|
|
|
|
|
386 |
with open(CREDENTIAL_FILE, "w") as f:
|
387 |
-
json.dump(creds_data, f)
|
|
|
|
|
388 |
|
389 |
def get_credentials():
|
390 |
"""Loads credentials matching gemini-cli OAuth2 flow."""
|
391 |
global credentials
|
392 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
393 |
# Check environment for credentials first
|
394 |
env_creds = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
|
395 |
if env_creds and os.path.exists(env_creds):
|
@@ -398,10 +420,7 @@ def get_credentials():
|
|
398 |
creds_data = json.load(f)
|
399 |
credentials = Credentials.from_authorized_user_info(creds_data, SCOPES)
|
400 |
print("Loaded credentials from GOOGLE_APPLICATION_CREDENTIALS.")
|
401 |
-
if credentials.expired
|
402 |
-
print("Refreshing expired credentials...")
|
403 |
-
credentials.refresh(GoogleAuthRequest())
|
404 |
-
save_credentials(credentials)
|
405 |
return credentials
|
406 |
except Exception as e:
|
407 |
print(f"Could not load credentials from GOOGLE_APPLICATION_CREDENTIALS: {e}")
|
@@ -412,53 +431,49 @@ def get_credentials():
|
|
412 |
with open(CREDENTIAL_FILE, "r") as f:
|
413 |
creds_data = json.load(f)
|
414 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
415 |
credentials = Credentials.from_authorized_user_info(creds_data, SCOPES)
|
416 |
print("Loaded credentials from cache.")
|
|
|
417 |
|
418 |
-
#
|
419 |
-
if
|
420 |
-
|
421 |
-
|
422 |
-
from google.auth.transport.requests import Request as AuthRequest
|
423 |
-
auth_request = AuthRequest()
|
424 |
-
credentials.refresh(auth_request)
|
425 |
-
print("Credentials refreshed successfully!")
|
426 |
-
|
427 |
-
# Save refreshed credentials
|
428 |
-
updated_creds_data = {
|
429 |
-
'client_id': credentials.client_id,
|
430 |
-
'client_secret': credentials.client_secret,
|
431 |
-
'access_token': credentials.token,
|
432 |
-
'refresh_token': credentials.refresh_token,
|
433 |
-
'scope': credentials.scopes,
|
434 |
-
'token_type': 'Bearer',
|
435 |
-
'token_uri': credentials.token_uri,
|
436 |
-
'expiry': credentials.expiry.isoformat() if credentials.expiry else None,
|
437 |
-
'project_id': creds_data.get('project_id')
|
438 |
-
}
|
439 |
-
|
440 |
-
with open(CREDENTIAL_FILE, 'w') as f:
|
441 |
-
json.dump(updated_creds_data, f, indent=2)
|
442 |
-
print("Refreshed credentials saved.")
|
443 |
-
|
444 |
-
except Exception as e:
|
445 |
-
print(f"Failed to refresh credentials: {e}")
|
446 |
-
return None
|
447 |
-
|
448 |
-
# Check if we have a valid token after potential refresh
|
449 |
-
if not credentials.token:
|
450 |
-
print("No access token available after refresh attempt. Starting new login.")
|
451 |
-
return None
|
452 |
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
-
|
460 |
-
|
461 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
462 |
return credentials
|
463 |
except Exception as e:
|
464 |
print(f"Could not load cached credentials: {e}. Starting new login.")
|
@@ -525,6 +540,149 @@ def get_credentials():
|
|
525 |
oauthlib.oauth2.rfc6749.parameters.validate_token_parameters = original_validate
|
526 |
|
527 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
528 |
@app.options("/{full_path:path}")
|
529 |
async def handle_preflight(request: Request, full_path: str):
|
530 |
"""Handle CORS preflight requests without authentication."""
|
@@ -546,8 +704,10 @@ async def proxy_request(request: Request, full_path: str, username: str = Depend
|
|
546 |
if not creds:
|
547 |
print("❌ No credentials available")
|
548 |
return Response(content="Authentication failed. Please restart the proxy to log in.", status_code=500)
|
|
|
|
|
549 |
|
550 |
-
# Check if credentials need refreshing (
|
551 |
if creds.expired and creds.refresh_token:
|
552 |
print("Credentials expired. Refreshing...")
|
553 |
try:
|
@@ -589,6 +749,10 @@ async def proxy_request(request: Request, full_path: str, username: str = Depend
|
|
589 |
# Remove our authentication parameters
|
590 |
query_params.pop("key", None)
|
591 |
|
|
|
|
|
|
|
|
|
592 |
# Add remaining query parameters to target URL if any
|
593 |
if query_params:
|
594 |
from urllib.parse import urlencode
|
@@ -631,7 +795,7 @@ async def proxy_request(request: Request, full_path: str, username: str = Depend
|
|
631 |
except (json.JSONDecodeError, AttributeError):
|
632 |
final_post_data = post_data
|
633 |
|
634 |
-
|
635 |
"Authorization": f"Bearer {creds.token}",
|
636 |
"Content-Type": "application/json",
|
637 |
"User-Agent": get_user_agent(),
|
@@ -642,157 +806,142 @@ async def proxy_request(request: Request, full_path: str, username: str = Depend
|
|
642 |
try:
|
643 |
print(f"[STREAM] Starting streaming request to: {target_url}")
|
644 |
print(f"[STREAM] Request payload size: {len(final_post_data)} bytes")
|
|
|
|
|
645 |
|
646 |
# Make the initial streaming request
|
647 |
-
resp = requests.post(target_url, data=final_post_data, headers=
|
648 |
print(f"[STREAM] Response status: {resp.status_code}")
|
649 |
print(f"[STREAM] Response headers: {dict(resp.headers)}")
|
650 |
|
651 |
-
# If we get a 401, try refreshing the token
|
652 |
-
if resp.status_code == 401
|
653 |
-
print("[STREAM] Received 401 from Google API. Attempting
|
654 |
-
|
655 |
-
|
656 |
-
|
657 |
-
|
658 |
-
|
659 |
-
|
660 |
-
|
661 |
-
|
662 |
-
|
663 |
-
|
664 |
-
|
665 |
-
|
666 |
-
|
667 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
668 |
return
|
669 |
|
670 |
with resp:
|
671 |
resp.raise_for_status()
|
672 |
|
673 |
-
|
674 |
-
|
675 |
-
in_array = False
|
676 |
-
chunk_count = 0
|
677 |
-
total_bytes = 0
|
678 |
-
objects_yielded = 0
|
679 |
|
680 |
-
|
681 |
-
|
682 |
-
for chunk in resp.
|
683 |
-
if
|
684 |
-
|
685 |
-
|
686 |
-
|
687 |
-
|
688 |
-
|
689 |
-
|
690 |
-
|
691 |
-
# Process complete JSON objects from the buffer
|
692 |
-
processing_iterations = 0
|
693 |
-
while buffer:
|
694 |
-
processing_iterations += 1
|
695 |
-
if processing_iterations > 100: # Prevent infinite loops
|
696 |
-
break
|
697 |
-
|
698 |
-
buffer = buffer.lstrip()
|
699 |
-
|
700 |
-
if not buffer:
|
701 |
-
break
|
702 |
-
|
703 |
-
# Handle array start
|
704 |
-
if buffer.startswith('[') and not in_array:
|
705 |
-
buffer = buffer[1:].lstrip()
|
706 |
-
in_array = True
|
707 |
-
continue
|
708 |
-
|
709 |
-
# Handle array end
|
710 |
-
if buffer.startswith(']'):
|
711 |
-
break
|
712 |
|
713 |
-
|
714 |
-
|
715 |
-
|
716 |
-
continue
|
717 |
-
|
718 |
-
# Look for complete JSON objects
|
719 |
-
if buffer.startswith('{'):
|
720 |
-
brace_count = 0
|
721 |
-
in_string = False
|
722 |
-
escape_next = False
|
723 |
-
end_pos = -1
|
724 |
-
|
725 |
-
for i, char in enumerate(buffer):
|
726 |
-
if escape_next:
|
727 |
-
escape_next = False
|
728 |
-
continue
|
729 |
-
if char == '\\':
|
730 |
-
escape_next = True
|
731 |
-
continue
|
732 |
-
if char == '"' and not escape_next:
|
733 |
-
in_string = not in_string
|
734 |
-
continue
|
735 |
-
if not in_string:
|
736 |
-
if char == '{':
|
737 |
-
brace_count += 1
|
738 |
-
elif char == '}':
|
739 |
-
brace_count -= 1
|
740 |
-
if brace_count == 0:
|
741 |
-
end_pos = i + 1
|
742 |
-
break
|
743 |
-
|
744 |
-
if end_pos > 0:
|
745 |
-
# Found complete JSON object
|
746 |
-
json_str = buffer[:end_pos]
|
747 |
-
buffer = buffer[end_pos:].lstrip()
|
748 |
-
|
749 |
|
750 |
-
|
751 |
-
|
752 |
-
|
753 |
-
|
754 |
-
|
755 |
-
|
756 |
-
|
757 |
-
|
758 |
-
|
759 |
-
continue
|
760 |
-
else:
|
761 |
-
# Incomplete object, wait for more data
|
762 |
-
break
|
763 |
-
else:
|
764 |
-
# Skip unexpected characters
|
765 |
-
buffer = buffer[1:]
|
766 |
|
767 |
except requests.exceptions.RequestException as e:
|
768 |
print(f"Error during streaming request: {e}")
|
769 |
-
|
770 |
-
yield f
|
771 |
except Exception as e:
|
772 |
print(f"An unexpected error occurred during streaming: {e}")
|
773 |
-
|
774 |
-
yield f
|
775 |
-
|
776 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
777 |
else:
|
778 |
# Make the request
|
779 |
-
|
|
|
|
|
780 |
|
781 |
-
|
782 |
-
|
783 |
-
|
784 |
-
|
785 |
-
|
786 |
-
|
787 |
-
|
788 |
-
|
789 |
-
|
790 |
-
|
791 |
-
|
792 |
-
|
793 |
-
|
794 |
-
|
795 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
796 |
|
797 |
if resp.status_code == 200:
|
798 |
try:
|
@@ -800,7 +949,7 @@ async def proxy_request(request: Request, full_path: str, username: str = Depend
|
|
800 |
# The actual response is nested under the "response" key
|
801 |
standard_gemini_response = google_api_response.get("response")
|
802 |
# Return the response object directly, not wrapped in a list
|
803 |
-
return Response(content=json.dumps(standard_gemini_response), status_code=200, media_type="application/json")
|
804 |
except (json.JSONDecodeError, AttributeError) as e:
|
805 |
print(f"Error converting to standard Gemini format: {e}")
|
806 |
# Fallback to sending the original content if conversion fails
|
|
|
357 |
raise
|
358 |
|
359 |
def save_credentials(creds, project_id=None):
|
360 |
+
print(f"DEBUG: Saving credentials - Token: {creds.token[:20] if creds.token else 'None'}..., Expired: {creds.expired}, Expiry: {creds.expiry}")
|
361 |
+
|
362 |
creds_data = {
|
363 |
"client_id": CLIENT_ID,
|
364 |
"client_secret": CLIENT_SECRET,
|
365 |
+
"token": creds.token, # Use 'token' instead of 'access_token' for consistency with Google Auth Library
|
366 |
"refresh_token": creds.refresh_token,
|
367 |
+
"scopes": creds.scopes if creds.scopes else SCOPES, # Use 'scopes' as list instead of 'scope' as string
|
|
|
368 |
"token_uri": "https://oauth2.googleapis.com/token",
|
369 |
}
|
370 |
|
371 |
+
# Add expiry if available - ensure it's timezone-aware
|
372 |
if creds.expiry:
|
373 |
+
# Ensure the expiry is timezone-aware (UTC)
|
374 |
+
if creds.expiry.tzinfo is None:
|
375 |
+
from datetime import timezone
|
376 |
+
expiry_utc = creds.expiry.replace(tzinfo=timezone.utc)
|
377 |
+
else:
|
378 |
+
expiry_utc = creds.expiry
|
379 |
+
creds_data["expiry"] = expiry_utc.isoformat()
|
380 |
+
print(f"DEBUG: Saving expiry as: {creds_data['expiry']}")
|
381 |
+
else:
|
382 |
+
print("DEBUG: No expiry time available to save")
|
383 |
|
384 |
# If project_id is provided, save it; otherwise preserve existing project_id
|
385 |
if project_id:
|
|
|
393 |
except Exception:
|
394 |
pass # If we can't read existing file, just continue without project_id
|
395 |
|
396 |
+
print(f"DEBUG: Final credential data to save: {json.dumps(creds_data, indent=2)}")
|
397 |
+
|
398 |
with open(CREDENTIAL_FILE, "w") as f:
|
399 |
+
json.dump(creds_data, f, indent=2)
|
400 |
+
|
401 |
+
print("DEBUG: Credentials saved to file")
|
402 |
|
403 |
def get_credentials():
|
404 |
"""Loads credentials matching gemini-cli OAuth2 flow."""
|
405 |
global credentials
|
406 |
|
407 |
+
# First, check if we already have valid credentials in memory
|
408 |
+
if credentials and credentials.token:
|
409 |
+
print("Using valid credentials from memory cache.")
|
410 |
+
print(f"DEBUG: Memory credentials - Token: {credentials.token[:20] if credentials.token else 'None'}..., Expired: {credentials.expired}, Expiry: {credentials.expiry}")
|
411 |
+
return credentials
|
412 |
+
else:
|
413 |
+
print("No valid credentials in memory. Loading from disk.")
|
414 |
+
|
415 |
# Check environment for credentials first
|
416 |
env_creds = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
|
417 |
if env_creds and os.path.exists(env_creds):
|
|
|
420 |
creds_data = json.load(f)
|
421 |
credentials = Credentials.from_authorized_user_info(creds_data, SCOPES)
|
422 |
print("Loaded credentials from GOOGLE_APPLICATION_CREDENTIALS.")
|
423 |
+
print(f"DEBUG: Env credentials - Token: {credentials.token[:20] if credentials.token else 'None'}..., Expired: {credentials.expired}, Expiry: {credentials.expiry}")
|
|
|
|
|
|
|
424 |
return credentials
|
425 |
except Exception as e:
|
426 |
print(f"Could not load credentials from GOOGLE_APPLICATION_CREDENTIALS: {e}")
|
|
|
431 |
with open(CREDENTIAL_FILE, "r") as f:
|
432 |
creds_data = json.load(f)
|
433 |
|
434 |
+
print(f"DEBUG: Raw credential data from file: {json.dumps(creds_data, indent=2)}")
|
435 |
+
|
436 |
+
# Handle both old format (access_token) and new format (token)
|
437 |
+
if "access_token" in creds_data and "token" not in creds_data:
|
438 |
+
creds_data["token"] = creds_data["access_token"]
|
439 |
+
print("DEBUG: Converted access_token to token field")
|
440 |
+
|
441 |
+
# Handle both old format (scope as string) and new format (scopes as list)
|
442 |
+
if "scope" in creds_data and "scopes" not in creds_data:
|
443 |
+
creds_data["scopes"] = creds_data["scope"].split()
|
444 |
+
print("DEBUG: Converted scope string to scopes list")
|
445 |
+
|
446 |
credentials = Credentials.from_authorized_user_info(creds_data, SCOPES)
|
447 |
print("Loaded credentials from cache.")
|
448 |
+
print(f"DEBUG: Loaded credentials - Token: {credentials.token[:20] if credentials.token else 'None'}..., Expired: {credentials.expired}, Expiry: {credentials.expiry}")
|
449 |
|
450 |
+
# Manual expiry check to avoid timezone issues
|
451 |
+
if credentials.expiry:
|
452 |
+
from datetime import datetime, timezone
|
453 |
+
now = datetime.now(timezone.utc)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
454 |
|
455 |
+
# Handle timezone-naive expiry by assuming it's UTC
|
456 |
+
if credentials.expiry.tzinfo is None:
|
457 |
+
expiry_utc = credentials.expiry.replace(tzinfo=timezone.utc)
|
458 |
+
else:
|
459 |
+
expiry_utc = credentials.expiry
|
460 |
+
|
461 |
+
time_until_expiry = expiry_utc - now
|
462 |
+
print(f"DEBUG: Current time: {now}")
|
463 |
+
print(f"DEBUG: Token expires at: {expiry_utc}")
|
464 |
+
print(f"DEBUG: Time until expiry: {time_until_expiry}")
|
465 |
+
|
466 |
+
# Override the expired property if the token is actually still valid
|
467 |
+
is_actually_expired = time_until_expiry.total_seconds() <= 0
|
468 |
+
print(f"DEBUG: Token is actually expired: {is_actually_expired}")
|
469 |
+
print(f"DEBUG: Google Auth Library says expired: {credentials.expired}")
|
470 |
+
|
471 |
+
if not is_actually_expired and credentials.token:
|
472 |
+
print("DEBUG: Token is valid, overriding expired status")
|
473 |
+
# Monkey patch the expired property to return False
|
474 |
+
credentials._expiry = expiry_utc
|
475 |
+
return credentials
|
476 |
+
|
477 |
return credentials
|
478 |
except Exception as e:
|
479 |
print(f"Could not load cached credentials: {e}. Starting new login.")
|
|
|
540 |
oauthlib.oauth2.rfc6749.parameters.validate_token_parameters = original_validate
|
541 |
|
542 |
|
543 |
+
@app.get("/v1/models")
|
544 |
+
async def list_models(request: Request, username: str = Depends(authenticate_user)):
|
545 |
+
"""List available models - matching gemini-cli supported models exactly."""
|
546 |
+
print(f"[GET] /v1/models - User: {username}")
|
547 |
+
|
548 |
+
# Return all models supported by gemini-cli based on tokenLimits.ts
|
549 |
+
models_response = {
|
550 |
+
"models": [
|
551 |
+
{
|
552 |
+
"name": "models/gemini-1.5-pro",
|
553 |
+
"version": "001",
|
554 |
+
"displayName": "Gemini 1.5 Pro",
|
555 |
+
"description": "Mid-size multimodal model that supports up to 2 million tokens",
|
556 |
+
"inputTokenLimit": 2097152,
|
557 |
+
"outputTokenLimit": 8192,
|
558 |
+
"supportedGenerationMethods": ["generateContent", "streamGenerateContent"],
|
559 |
+
"temperature": 1.0,
|
560 |
+
"maxTemperature": 2.0,
|
561 |
+
"topP": 0.95,
|
562 |
+
"topK": 64
|
563 |
+
},
|
564 |
+
{
|
565 |
+
"name": "models/gemini-1.5-flash",
|
566 |
+
"version": "001",
|
567 |
+
"displayName": "Gemini 1.5 Flash",
|
568 |
+
"description": "Fast and versatile multimodal model for scaling across diverse tasks",
|
569 |
+
"inputTokenLimit": 1048576,
|
570 |
+
"outputTokenLimit": 8192,
|
571 |
+
"supportedGenerationMethods": ["generateContent", "streamGenerateContent"],
|
572 |
+
"temperature": 1.0,
|
573 |
+
"maxTemperature": 2.0,
|
574 |
+
"topP": 0.95,
|
575 |
+
"topK": 64
|
576 |
+
},
|
577 |
+
{
|
578 |
+
"name": "models/gemini-2.5-pro-preview-05-06",
|
579 |
+
"version": "001",
|
580 |
+
"displayName": "Gemini 2.5 Pro Preview 05-06",
|
581 |
+
"description": "Preview version of Gemini 2.5 Pro from May 6th",
|
582 |
+
"inputTokenLimit": 1048576,
|
583 |
+
"outputTokenLimit": 8192,
|
584 |
+
"supportedGenerationMethods": ["generateContent", "streamGenerateContent"],
|
585 |
+
"temperature": 1.0,
|
586 |
+
"maxTemperature": 2.0,
|
587 |
+
"topP": 0.95,
|
588 |
+
"topK": 64
|
589 |
+
},
|
590 |
+
{
|
591 |
+
"name": "models/gemini-2.5-pro-preview-06-05",
|
592 |
+
"version": "001",
|
593 |
+
"displayName": "Gemini 2.5 Pro Preview 06-05",
|
594 |
+
"description": "Preview version of Gemini 2.5 Pro from June 5th",
|
595 |
+
"inputTokenLimit": 1048576,
|
596 |
+
"outputTokenLimit": 8192,
|
597 |
+
"supportedGenerationMethods": ["generateContent", "streamGenerateContent"],
|
598 |
+
"temperature": 1.0,
|
599 |
+
"maxTemperature": 2.0,
|
600 |
+
"topP": 0.95,
|
601 |
+
"topK": 64
|
602 |
+
},
|
603 |
+
{
|
604 |
+
"name": "models/gemini-2.5-pro",
|
605 |
+
"version": "001",
|
606 |
+
"displayName": "Gemini 2.5 Pro",
|
607 |
+
"description": "Advanced multimodal model with enhanced capabilities",
|
608 |
+
"inputTokenLimit": 1048576,
|
609 |
+
"outputTokenLimit": 8192,
|
610 |
+
"supportedGenerationMethods": ["generateContent", "streamGenerateContent"],
|
611 |
+
"temperature": 1.0,
|
612 |
+
"maxTemperature": 2.0,
|
613 |
+
"topP": 0.95,
|
614 |
+
"topK": 64
|
615 |
+
},
|
616 |
+
{
|
617 |
+
"name": "models/gemini-2.5-flash-preview-05-20",
|
618 |
+
"version": "001",
|
619 |
+
"displayName": "Gemini 2.5 Flash Preview 05-20",
|
620 |
+
"description": "Preview version of Gemini 2.5 Flash from May 20th",
|
621 |
+
"inputTokenLimit": 1048576,
|
622 |
+
"outputTokenLimit": 8192,
|
623 |
+
"supportedGenerationMethods": ["generateContent", "streamGenerateContent"],
|
624 |
+
"temperature": 1.0,
|
625 |
+
"maxTemperature": 2.0,
|
626 |
+
"topP": 0.95,
|
627 |
+
"topK": 64
|
628 |
+
},
|
629 |
+
{
|
630 |
+
"name": "models/gemini-2.5-flash",
|
631 |
+
"version": "001",
|
632 |
+
"displayName": "Gemini 2.5 Flash",
|
633 |
+
"description": "Fast and efficient multimodal model with latest improvements",
|
634 |
+
"inputTokenLimit": 1048576,
|
635 |
+
"outputTokenLimit": 8192,
|
636 |
+
"supportedGenerationMethods": ["generateContent", "streamGenerateContent"],
|
637 |
+
"temperature": 1.0,
|
638 |
+
"maxTemperature": 2.0,
|
639 |
+
"topP": 0.95,
|
640 |
+
"topK": 64
|
641 |
+
},
|
642 |
+
{
|
643 |
+
"name": "models/gemini-2.0-flash",
|
644 |
+
"version": "001",
|
645 |
+
"displayName": "Gemini 2.0 Flash",
|
646 |
+
"description": "Latest generation fast multimodal model",
|
647 |
+
"inputTokenLimit": 1048576,
|
648 |
+
"outputTokenLimit": 8192,
|
649 |
+
"supportedGenerationMethods": ["generateContent", "streamGenerateContent"],
|
650 |
+
"temperature": 1.0,
|
651 |
+
"maxTemperature": 2.0,
|
652 |
+
"topP": 0.95,
|
653 |
+
"topK": 64
|
654 |
+
},
|
655 |
+
{
|
656 |
+
"name": "models/gemini-2.0-flash-preview-image-generation",
|
657 |
+
"version": "001",
|
658 |
+
"displayName": "Gemini 2.0 Flash Preview Image Generation",
|
659 |
+
"description": "Preview version with image generation capabilities",
|
660 |
+
"inputTokenLimit": 32000,
|
661 |
+
"outputTokenLimit": 8192,
|
662 |
+
"supportedGenerationMethods": ["generateContent", "streamGenerateContent"],
|
663 |
+
"temperature": 1.0,
|
664 |
+
"maxTemperature": 2.0,
|
665 |
+
"topP": 0.95,
|
666 |
+
"topK": 64
|
667 |
+
},
|
668 |
+
{
|
669 |
+
"name": "models/gemini-embedding-001",
|
670 |
+
"version": "001",
|
671 |
+
"displayName": "Gemini Embedding 001",
|
672 |
+
"description": "Text embedding model for semantic similarity and search",
|
673 |
+
"inputTokenLimit": 2048,
|
674 |
+
"outputTokenLimit": 1,
|
675 |
+
"supportedGenerationMethods": ["embedContent"],
|
676 |
+
"temperature": 0.0,
|
677 |
+
"maxTemperature": 0.0,
|
678 |
+
"topP": 1.0,
|
679 |
+
"topK": 1
|
680 |
+
}
|
681 |
+
]
|
682 |
+
}
|
683 |
+
|
684 |
+
return Response(content=json.dumps(models_response), status_code=200, media_type="application/json; charset=utf-8")
|
685 |
+
|
686 |
@app.options("/{full_path:path}")
|
687 |
async def handle_preflight(request: Request, full_path: str):
|
688 |
"""Handle CORS preflight requests without authentication."""
|
|
|
704 |
if not creds:
|
705 |
print("❌ No credentials available")
|
706 |
return Response(content="Authentication failed. Please restart the proxy to log in.", status_code=500)
|
707 |
+
|
708 |
+
print(f"Using credentials - Token: {creds.token[:20] if creds.token else 'None'}..., Expired: {creds.expired}")
|
709 |
|
710 |
+
# Check if credentials need refreshing (only when expired)
|
711 |
if creds.expired and creds.refresh_token:
|
712 |
print("Credentials expired. Refreshing...")
|
713 |
try:
|
|
|
749 |
# Remove our authentication parameters
|
750 |
query_params.pop("key", None)
|
751 |
|
752 |
+
# For streaming requests, always ensure alt=sse is set
|
753 |
+
if is_streaming:
|
754 |
+
query_params["alt"] = "sse"
|
755 |
+
|
756 |
# Add remaining query parameters to target URL if any
|
757 |
if query_params:
|
758 |
from urllib.parse import urlencode
|
|
|
795 |
except (json.JSONDecodeError, AttributeError):
|
796 |
final_post_data = post_data
|
797 |
|
798 |
+
request_headers = {
|
799 |
"Authorization": f"Bearer {creds.token}",
|
800 |
"Content-Type": "application/json",
|
801 |
"User-Agent": get_user_agent(),
|
|
|
806 |
try:
|
807 |
print(f"[STREAM] Starting streaming request to: {target_url}")
|
808 |
print(f"[STREAM] Request payload size: {len(final_post_data)} bytes")
|
809 |
+
print(f"[STREAM] Authorization header: Bearer {creds.token[:50]}...")
|
810 |
+
print(f"[STREAM] Full headers being sent: {request_headers}")
|
811 |
|
812 |
# Make the initial streaming request
|
813 |
+
resp = requests.post(target_url, data=final_post_data, headers=request_headers, stream=True)
|
814 |
print(f"[STREAM] Response status: {resp.status_code}")
|
815 |
print(f"[STREAM] Response headers: {dict(resp.headers)}")
|
816 |
|
817 |
+
# If we get a 401, try refreshing the token once
|
818 |
+
if resp.status_code == 401:
|
819 |
+
print("[STREAM] Received 401 from Google API. Attempting token refresh...")
|
820 |
+
print(f"[STREAM] Response text: {resp.text}")
|
821 |
+
|
822 |
+
if creds.refresh_token:
|
823 |
+
try:
|
824 |
+
creds.refresh(GoogleAuthRequest())
|
825 |
+
save_credentials(creds)
|
826 |
+
print("[STREAM] Token refreshed successfully. Retrying request...")
|
827 |
+
|
828 |
+
# Update headers with new token
|
829 |
+
request_headers["Authorization"] = f"Bearer {creds.token}"
|
830 |
+
|
831 |
+
# Retry the request with refreshed token
|
832 |
+
resp = requests.post(target_url, data=final_post_data, headers=request_headers, stream=True)
|
833 |
+
print(f"[STREAM] Retry response status: {resp.status_code}")
|
834 |
+
|
835 |
+
if resp.status_code == 401:
|
836 |
+
print("[STREAM] Still getting 401 after token refresh.")
|
837 |
+
yield f'data: {{"error": {{"message": "Authentication failed even after token refresh. Please restart the proxy to re-authenticate."}}}}\n\n'
|
838 |
+
return
|
839 |
+
except Exception as refresh_error:
|
840 |
+
print(f"[STREAM] Token refresh failed: {refresh_error}")
|
841 |
+
yield f'data: {{"error": {{"message": "Token refresh failed. Please restart the proxy to re-authenticate."}}}}\n\n'
|
842 |
+
return
|
843 |
+
else:
|
844 |
+
print("[STREAM] No refresh token available.")
|
845 |
+
yield f'data: {{"error": {{"message": "Authentication failed. Please restart the proxy to re-authenticate."}}}}\n\n'
|
846 |
return
|
847 |
|
848 |
with resp:
|
849 |
resp.raise_for_status()
|
850 |
|
851 |
+
# Process exactly like the real Gemini SDK
|
852 |
+
print("[STREAM] Processing with Gemini SDK-compatible logic")
|
|
|
|
|
|
|
|
|
853 |
|
854 |
+
# Use iter_lines() exactly like the real Gemini SDK (without decode_unicode)
|
855 |
+
# This should be non-blocking and yield lines as they arrive
|
856 |
+
for chunk in resp.iter_lines():
|
857 |
+
if chunk:
|
858 |
+
# Decode UTF-8 if it's bytes (matching SDK logic exactly)
|
859 |
+
if not isinstance(chunk, str):
|
860 |
+
chunk = chunk.decode('utf-8')
|
861 |
+
|
862 |
+
# Strip 'data: ' prefix if present (matching SDK logic)
|
863 |
+
if chunk.startswith('data: '):
|
864 |
+
chunk = chunk[len('data: '):]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
865 |
|
866 |
+
try:
|
867 |
+
# Parse the JSON from Google's internal API
|
868 |
+
obj = json.loads(chunk)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
869 |
|
870 |
+
# Convert Google's internal format to standard Gemini format
|
871 |
+
if "response" in obj:
|
872 |
+
response_chunk = obj["response"]
|
873 |
+
# Output in standard Gemini streaming format
|
874 |
+
response_json = json.dumps(response_chunk, separators=(',', ':'))
|
875 |
+
yield f"data: {response_json}\n\n"
|
876 |
+
except json.JSONDecodeError:
|
877 |
+
# Skip invalid JSON
|
878 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
879 |
|
880 |
except requests.exceptions.RequestException as e:
|
881 |
print(f"Error during streaming request: {e}")
|
882 |
+
# Format error as real Gemini API would
|
883 |
+
yield f'data: {{"error": {{"message": "Upstream request failed: {str(e)}"}}}}\n\n'
|
884 |
except Exception as e:
|
885 |
print(f"An unexpected error occurred during streaming: {e}")
|
886 |
+
# Format error as real Gemini API would
|
887 |
+
yield f'data: {{"error": {{"message": "An unexpected error occurred: {str(e)}"}}}}\n\n'
|
888 |
+
|
889 |
+
# Create the streaming response with headers matching real Gemini API
|
890 |
+
response_headers = {
|
891 |
+
"Content-Type": "text/event-stream",
|
892 |
+
"Content-Disposition": "attachment",
|
893 |
+
"Vary": "Origin, X-Origin, Referer",
|
894 |
+
"X-XSS-Protection": "0",
|
895 |
+
"X-Frame-Options": "SAMEORIGIN",
|
896 |
+
"X-Content-Type-Options": "nosniff",
|
897 |
+
"Server": "ESF"
|
898 |
+
}
|
899 |
+
|
900 |
+
response = StreamingResponse(
|
901 |
+
stream_generator(),
|
902 |
+
media_type="text/event-stream",
|
903 |
+
headers=response_headers
|
904 |
+
)
|
905 |
+
|
906 |
+
return response
|
907 |
else:
|
908 |
# Make the request
|
909 |
+
print(f"[NON-STREAM] Starting request to: {target_url}")
|
910 |
+
print(f"[NON-STREAM] Authorization header: Bearer {creds.token[:50]}...")
|
911 |
+
print(f"[NON-STREAM] Full headers being sent: {request_headers}")
|
912 |
|
913 |
+
resp = requests.post(target_url, data=final_post_data, headers=request_headers)
|
914 |
+
|
915 |
+
print(f"[NON-STREAM] Response status: {resp.status_code}")
|
916 |
+
print(f"[NON-STREAM] Response headers: {dict(resp.headers)}")
|
917 |
+
|
918 |
+
# If we get a 401, try refreshing the token once
|
919 |
+
if resp.status_code == 401:
|
920 |
+
print("Received 401 from Google API. Attempting token refresh...")
|
921 |
+
print(f"Response text: {resp.text}")
|
922 |
+
|
923 |
+
if creds.refresh_token:
|
924 |
+
try:
|
925 |
+
creds.refresh(GoogleAuthRequest())
|
926 |
+
save_credentials(creds)
|
927 |
+
print("Token refreshed successfully. Retrying request...")
|
928 |
+
|
929 |
+
# Update headers with new token
|
930 |
+
request_headers["Authorization"] = f"Bearer {creds.token}"
|
931 |
+
|
932 |
+
# Retry the request with refreshed token
|
933 |
+
resp = requests.post(target_url, data=final_post_data, headers=request_headers)
|
934 |
+
print(f"Retry response status: {resp.status_code}")
|
935 |
+
|
936 |
+
if resp.status_code == 401:
|
937 |
+
print("Still getting 401 after token refresh.")
|
938 |
+
return Response(content="Authentication failed even after token refresh. Please restart the proxy to re-authenticate.", status_code=500)
|
939 |
+
except Exception as refresh_error:
|
940 |
+
print(f"Token refresh failed: {refresh_error}")
|
941 |
+
return Response(content="Token refresh failed. Please restart the proxy to re-authenticate.", status_code=500)
|
942 |
+
else:
|
943 |
+
print("No refresh token available.")
|
944 |
+
return Response(content="Authentication failed. Please restart the proxy to re-authenticate.", status_code=500)
|
945 |
|
946 |
if resp.status_code == 200:
|
947 |
try:
|
|
|
949 |
# The actual response is nested under the "response" key
|
950 |
standard_gemini_response = google_api_response.get("response")
|
951 |
# Return the response object directly, not wrapped in a list
|
952 |
+
return Response(content=json.dumps(standard_gemini_response), status_code=200, media_type="application/json; charset=utf-8")
|
953 |
except (json.JSONDecodeError, AttributeError) as e:
|
954 |
print(f"Error converting to standard Gemini format: {e}")
|
955 |
# Fallback to sending the original content if conversion fails
|