Spaces:

asmhashenry12345
/

gpt2-text-generator

Running

App Files Files Community

sonyps1928 commited on 8 days ago

Commit

1b3fa51

1 Parent(s): adb694f

update app16

Browse files

Files changed (2) hide show

app.py +362 -159
requirements.txt +6 -4

app.py CHANGED Viewed

@@ -1,173 +1,376 @@
-import streamlit as st
-import os
-from transformers import GPT2LMHeadModel, GPT2Tokenizer
-import torch
-# ----------------------------
-# Page config
-# ----------------------------
-st.set_page_config(
-    page_title="GPT-2 Text Generator",
-    page_icon="🤖",
-    layout="wide"
 )
-# ----------------------------
-# Load environment variables
-# ----------------------------
-HF_TOKEN = os.getenv("HF_TOKEN")
-API_KEY = os.getenv("API_KEY")
-ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD")
-# ----------------------------
-# Model loading
-# ----------------------------
-@st.cache_resource
-def load_model():
-    """Load and cache the GPT-2 model"""
-    with st.spinner("Loading GPT-2 model..."):
-        try:
-            tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
-            model = GPT2LMHeadModel.from_pretrained("gpt2")
-            tokenizer.pad_token = tokenizer.eos_token
-            return tokenizer, model
-        except Exception as e:
-            st.error(f"Error loading model: {e}")
-            return None, None
-# ----------------------------
-# Text generation
-# ----------------------------
-def generate_text(prompt, max_length, temperature, tokenizer, model):
-    """Generate text using GPT-2"""
-    if not prompt:
-        return "Please enter a prompt"
-    if len(prompt) > 500:
-        return "Prompt too long (max 500 characters)"
-    try:
-        inputs = tokenizer.encode(prompt, return_tensors="pt", max_length=300, truncation=True)
-        with torch.no_grad():
-            outputs = model.generate(
-                inputs,
-                max_length=inputs.shape[1] + max_length,
-                temperature=temperature,
-                do_sample=True,
-                pad_token_id=tokenizer.eos_token_id,
-                eos_token_id=tokenizer.eos_token_id,
-                no_repeat_ngram_size=2
-            )
-        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        new_text = generated_text[len(prompt):].strip()
-        return new_text if new_text else "No text generated. Try a different prompt."
-    except Exception as e:
-        return f"Error generating text: {str(e)}"
-# ----------------------------
 # Authentication
-# ----------------------------
-def check_auth():
-    """Handle authentication"""
-    if ADMIN_PASSWORD:
-        if "authenticated" not in st.session_state:
-            st.session_state.authenticated = False
-        if not st.session_state.authenticated:
-            st.title("🔒 Authentication Required")
-            password = st.text_input("Enter admin password:", type="password")
-            if st.button("Login"):
-                if password == ADMIN_PASSWORD:
-                    st.session_state.authenticated = True
-                    st.experimental_rerun()
-                else:
-                    st.error("Invalid password")
-            return False
-    return True
-# ----------------------------
-# Main UI
-# ----------------------------
-def main():
-    if not check_auth():
-        return
-    tokenizer, model = load_model()
-    if tokenizer is None or model is None:
-        st.error("Failed to load model. Please check the logs.")
-        return
-    st.title("🤖 GPT-2 Text Generator")
-    st.markdown("Generate text using GPT-2 language model")
-    # Security status
-    col1, col2, col3 = st.columns(3)
-    with col1:
-        st.success("🔑 HF Token: Active" if HF_TOKEN else "🔑 HF Token: Not set")
-    with col2:
-        st.success("🔒 API Auth: Enabled" if API_KEY else "🔒 API Auth: Disabled")
-    with col3:
-        st.success("👤 Admin Auth: Active" if ADMIN_PASSWORD else "👤 Admin Auth: Disabled")
-    # Input section
-    st.subheader("📝 Input")
-    col1, col2 = st.columns([2, 1])
-    with col1:
-        prompt = st.text_area(
-            "Enter your prompt:",
-            placeholder="Type your text here...",
-            height=100
-        )
-        api_key = ""
-        if API_KEY:
-            api_key = st.text_input("API Key:", type="password")
-    with col2:
-        st.subheader("⚙️ Settings")
-        max_length = st.slider("Max Length", 20, 200, 100, 10)
-        temperature = st.slider("Temperature", 0.1, 1.5, 0.7, 0.1)
-        generate_btn = st.button("🚀 Generate Text", type="primary")
-    # API key validation
-    if API_KEY and generate_btn:
-        if not api_key or api_key != API_KEY:
-            st.error("🔒 Invalid or missing API key")
-            return
-    # Generate text
-    if generate_btn and prompt:
-        with st.spinner("Generating text..."):
-            result = generate_text(prompt, max_length, temperature, tokenizer, model)
-        st.subheader("📄 Generated Text")
-        st.text_area("Output:", value=result, height=200)
-        st.code(result)
-    elif generate_btn:
-        st.warning("Please enter a prompt")
-    # Example prompts
-    st.subheader("💡 Example Prompts")
-    examples = [
-        "Once upon a time in a distant galaxy,",
-        "The future of artificial intelligence is",
-        "In the heart of the ancient forest,",
-        "The detective walked into the room and noticed"
     ]
-    cols = st.columns(len(examples))
-    for i, example in enumerate(examples):
-        with cols[i]:
-            if st.button(f"Use Example {i+1}", key=f"ex_{i}"):
-                st.session_state.example_prompt = example
-                st.experimental_rerun()
-    if hasattr(st.session_state, 'example_prompt'):
-        st.info(f"Example selected: {st.session_state.example_prompt}")
 if __name__ == "__main__":
-    main()

+import logging
+import time
+import random
+from typing import Dict, Any, List, Optional
+import uvicorn
+from fastapi import FastAPI, HTTPException, Depends, Request
+from fastapi.responses import JSONResponse
+from fastapi.exception_handlers import http_exception_handler
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+import requests
+import json
+from config import config
+# Configure logging
+logging.basicConfig(level=getattr(logging, config.LOG_LEVEL))
+logger = logging.getLogger(__name__)
+# FastAPI app
+app = FastAPI(
+    title="Advanced Gemini Proxy",
+    description="OpenAI-compatible proxy for Google Gemini API",
+    version="1.0.0"
 )
+# CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Custom exception handler
+@app.exception_handler(HTTPException)
+async def custom_http_exception_handler(request: Request, exc: HTTPException):
+    # If detail is already in OpenAI format, return as-is
+    if isinstance(exc.detail, dict) and "error" in exc.detail:
+        return JSONResponse(
+            status_code=exc.status_code,
+            content=exc.detail
+        )
+    # Otherwise, format as OpenAI error
+    error_response = {
+        "error": {
+            "message": str(exc.detail),
+            "type": "api_error",
+            "param": None,
+            "code": None
+        }
+    }
+    return JSONResponse(
+        status_code=exc.status_code,
+        content=error_response
+    )
+# Security
+security = HTTPBearer()
+# Rate limiting storage (in-memory for simplicity)
+rate_limit_storage: Dict[str, List[float]] = {}
+# Pydantic models
+class ChatMessage(BaseModel):
+    role: str
+    content: str
+class ChatCompletionRequest(BaseModel):
+    model: str
+    messages: List[ChatMessage]
+    temperature: Optional[float] = 1.0
+    max_tokens: Optional[int] = None
+    stream: Optional[bool] = False
+class Choice(BaseModel):
+    index: int
+    message: Dict[str, str]
+    finish_reason: str
+class Usage(BaseModel):
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+class ChatCompletionResponse(BaseModel):
+    id: str
+    object: str = "chat.completion"
+    created: int
+    model: str
+    choices: List[Choice]
+    usage: Usage
 # Authentication
+async def verify_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
+    if credentials.credentials != config.MASTER_API_KEY:
+        error_response = {
+            "error": {
+                "message": "Invalid API key provided",
+                "type": "invalid_request_error",
+                "param": None,
+                "code": "invalid_api_key"
+            }
+        }
+        raise HTTPException(status_code=401, detail=error_response)
+    return credentials.credentials
+# Rate limiting
+def check_rate_limit(client_ip: str) -> tuple[bool, int]:
+    now = time.time()
+    if client_ip not in rate_limit_storage:
+        rate_limit_storage[client_ip] = []
+    # Clean old entries
+    rate_limit_storage[client_ip] = [
+        timestamp for timestamp in rate_limit_storage[client_ip]
+        if now - timestamp < 60
     ]
+    current_count = len(rate_limit_storage[client_ip])
+    # Check limit
+    if current_count >= config.MAX_REQUESTS_PER_MINUTE:
+        # Calculate reset time
+        oldest_request = min(rate_limit_storage[client_ip])
+        reset_time = int(oldest_request + 60)
+        return False, reset_time
+    # Add current request
+    rate_limit_storage[client_ip].append(now)
+    return True, 0
+# Gemini API interaction
+def get_random_api_key() -> str:
+    return random.choice(config.GEMINI_API_KEYS)
+def convert_to_gemini_format(messages: List[ChatMessage]) -> List[Dict[str, Any]]:
+    gemini_messages = []
+    for msg in messages:
+        if msg.role == "system":
+            # Handle system messages by converting to user message with instruction
+            gemini_messages.append({
+                "role": "user",
+                "parts": [{"text": f"System instruction: {msg.content}"}]
+            })
+        else:
+            role = "user" if msg.role == "user" else "model"
+            gemini_messages.append({
+                "role": role,
+                "parts": [{"text": msg.content}]
+            })
+    return gemini_messages
+def estimate_tokens(text: str) -> int:
+    """Simple token estimation - roughly 1 token per 4 characters"""
+    return max(1, len(text) // 4)
+def call_gemini_api(messages: List[ChatMessage], model: str, temperature: float, max_tokens: Optional[int]) -> Dict[str, Any]:
+    api_key = get_random_api_key()
+    # Convert model name
+    if "gpt-4" in model.lower():
+        gemini_model = "gemini-1.5-pro-latest"
+    elif "gpt-3.5" in model.lower():
+        gemini_model = "gemini-1.5-flash-latest"
+    else:
+        gemini_model = "gemini-1.5-flash-latest"  # Default fallback
+    url = f"https://generativelanguage.googleapis.com/v1beta/models/{gemini_model}:generateContent"
+    # Convert messages
+    gemini_messages = convert_to_gemini_format(messages)
+    payload = {
+        "contents": gemini_messages,
+        "generationConfig": {
+            "temperature": max(0.0, min(2.0, temperature)),  # Clamp temperature
+        },
+        "safetySettings": [
+            {
+                "category": "HARM_CATEGORY_HARASSMENT",
+                "threshold": "BLOCK_MEDIUM_AND_ABOVE"
+            },
+            {
+                "category": "HARM_CATEGORY_HATE_SPEECH",
+                "threshold": "BLOCK_MEDIUM_AND_ABOVE"
+            },
+            {
+                "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                "threshold": "BLOCK_MEDIUM_AND_ABOVE"
+            },
+            {
+                "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                "threshold": "BLOCK_MEDIUM_AND_ABOVE"
+            }
+        ]
+    }
+    if max_tokens and max_tokens > 0:
+        payload["generationConfig"]["maxOutputTokens"] = min(max_tokens, 8192)  # Gemini limit
+    headers = {
+        "Content-Type": "application/json",
+        "x-goog-api-key": api_key
+    }
+    try:
+        response = requests.post(url, json=payload, headers=headers, timeout=30)
+        if response.status_code != 200:
+            logger.error(f"Gemini API error: {response.status_code} - {response.text}")
+            error_response = {
+                "error": {
+                    "message": f"Gemini API error: {response.text}",
+                    "type": "api_error",
+                    "param": None,
+                    "code": "gemini_api_error"
+                }
+            }
+            raise HTTPException(status_code=response.status_code, detail=error_response)
+        return response.json()
+    except requests.exceptions.Timeout:
+        raise HTTPException(status_code=408, detail="Request timeout")
+    except requests.exceptions.RequestException as e:
+        logger.error(f"Request error: {str(e)}")
+        raise HTTPException(status_code=500, detail="Failed to connect to Gemini API")
+# Routes
+@app.get("/")
+async def root():
+    return {"message": "Advanced Gemini Proxy is running!", "version": "1.0.0"}
+@app.get("/health")
+async def health_check():
+    return {"status": "healthy", "timestamp": time.time()}
+@app.get("/v1/models")
+async def list_models(api_key: str = Depends(verify_api_key)):
+    return {
+        "object": "list",
+        "data": [
+            {
+                "id": "gpt-3.5-turbo",
+                "object": "model",
+                "created": int(time.time()),
+                "owned_by": "gemini-proxy"
+            },
+            {
+                "id": "gpt-4",
+                "object": "model",
+                "created": int(time.time()),
+                "owned_by": "gemini-proxy"
+            }
+        ]
+    }
+@app.post("/v1/chat/completions")
+async def chat_completions(
+    request: ChatCompletionRequest,
+    client_request: Request,
+    api_key: str = Depends(verify_api_key)
+):
+    # Rate limiting
+    client_ip = client_request.client.host
+    allowed, reset_time = check_rate_limit(client_ip)
+    if not allowed:
+        error_response = {
+            "error": {
+                "message": "Rate limit reached for requests",
+                "type": "rate_limit_exceeded",
+                "param": None,
+                "code": "rate_limit_exceeded"
+            }
+        }
+        headers = {
+            "X-RateLimit-Limit": str(config.MAX_REQUESTS_PER_MINUTE),
+            "X-RateLimit-Remaining": "0",
+            "X-RateLimit-Reset": str(reset_time),
+            "Retry-After": str(60)
+        }
+        return JSONResponse(
+            status_code=429,
+            content=error_response,
+            headers=headers
+        )
+    # Validate request
+    if not request.messages:
+        error_response = {
+            "error": {
+                "message": "Missing required parameter: 'messages'",
+                "type": "invalid_request_error",
+                "param": "messages",
+                "code": "missing_required_parameter"
+            }
+        }
+        raise HTTPException(status_code=400, detail=error_response)
+    try:
+        # Call Gemini API
+        gemini_response = call_gemini_api(
+            request.messages,
+            request.model,
+            request.temperature,
+            request.max_tokens
+        )
+        # Extract response text
+        if "candidates" not in gemini_response or not gemini_response["candidates"]:
+            # Check for blocked content
+            if "promptFeedback" in gemini_response and "blockReason" in gemini_response["promptFeedback"]:
+                block_reason = gemini_response["promptFeedback"]["blockReason"]
+                raise HTTPException(status_code=400, detail=f"Content blocked: {block_reason}")
+            raise HTTPException(status_code=500, detail="No response from Gemini API")
+        candidate = gemini_response["candidates"][0]
+        # Check if response was blocked
+        if "finishReason" in candidate and candidate["finishReason"] in ["SAFETY", "RECITATION"]:
+            raise HTTPException(status_code=400, detail=f"Response blocked: {candidate['finishReason']}")
+        if "content" not in candidate or "parts" not in candidate["content"]:
+            raise HTTPException(status_code=500, detail="Invalid response format from Gemini API")
+        response_text = candidate["content"]["parts"][0]["text"]
+        # Calculate token usage
+        prompt_text = " ".join([msg.content for msg in request.messages])
+        prompt_tokens = estimate_tokens(prompt_text)
+        completion_tokens = estimate_tokens(response_text)
+        # Convert to OpenAI format
+        response = ChatCompletionResponse(
+            id=f"chatcmpl-{int(time.time())}{random.randint(1000, 9999)}",
+            created=int(time.time()),
+            model=request.model,
+            choices=[Choice(
+                index=0,
+                message={
+                    "role": "assistant",
+                    "content": response_text
+                },
+                finish_reason="stop"
+            )],
+            usage=Usage(
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens,
+                total_tokens=prompt_tokens + completion_tokens
+            )
+        )
+        return response
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Unexpected error: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
 if __name__ == "__main__":
+    logger.info(f"🚀 Starting Advanced Gemini Proxy on {config.HOST}:{config.PORT}")
+    logger.info(f"🔑 Master API Key: {config.MASTER_API_KEY[:8]}...")
+    logger.info(f"🔧 Loaded {len(config.GEMINI_API_KEYS)} Gemini API key(s)")
+    uvicorn.run(
+        app,
+        host=config.HOST,
+        port=config.PORT,
+        log_level=config.LOG_LEVEL.lower()
+    )

requirements.txt CHANGED Viewed

@@ -1,4 +1,6 @@
-streamlit==1.28.1
-transformers==4.44.2
-torch==2.4.1
-tokenizers==0.19.1

+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+requests==2.31.0
+python-multipart==0.0.6
+pydantic==2.5.0
+python-dotenv==1.0.0