Spaces:

asmhashenry12345
/

gpt2-text-generator

Running

App Files Files Community

sonyps1928 commited on 8 days ago

Commit

e27c591

1 Parent(s): 1b3fa51

update

Browse files

Files changed (2) hide show

app.py +104 -365
requirements.txt +4 -6

app.py CHANGED Viewed

@@ -1,376 +1,115 @@
-import logging
-import time
-import random
-from typing import Dict, Any, List, Optional
-import uvicorn
-from fastapi import FastAPI, HTTPException, Depends, Request
-from fastapi.responses import JSONResponse
-from fastapi.exception_handlers import http_exception_handler
-from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
-from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel
-import requests
-import json
-from config import config
-# Configure logging
-logging.basicConfig(level=getattr(logging, config.LOG_LEVEL))
-logger = logging.getLogger(__name__)
-# FastAPI app
-app = FastAPI(
-    title="Advanced Gemini Proxy",
-    description="OpenAI-compatible proxy for Google Gemini API",
-    version="1.0.0"
-)
-# CORS middleware
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# Custom exception handler
-@app.exception_handler(HTTPException)
-async def custom_http_exception_handler(request: Request, exc: HTTPException):
-    # If detail is already in OpenAI format, return as-is
-    if isinstance(exc.detail, dict) and "error" in exc.detail:
-        return JSONResponse(
-            status_code=exc.status_code,
-            content=exc.detail
-        )
-    # Otherwise, format as OpenAI error
-    error_response = {
-        "error": {
-            "message": str(exc.detail),
-            "type": "api_error",
-            "param": None,
-            "code": None
-        }
-    }
-    return JSONResponse(
-        status_code=exc.status_code,
-        content=error_response
-    )
-# Security
-security = HTTPBearer()
-# Rate limiting storage (in-memory for simplicity)
-rate_limit_storage: Dict[str, List[float]] = {}
-# Pydantic models
-class ChatMessage(BaseModel):
-    role: str
-    content: str
-class ChatCompletionRequest(BaseModel):
-    model: str
-    messages: List[ChatMessage]
-    temperature: Optional[float] = 1.0
-    max_tokens: Optional[int] = None
-    stream: Optional[bool] = False
-class Choice(BaseModel):
-    index: int
-    message: Dict[str, str]
-    finish_reason: str
-class Usage(BaseModel):
-    prompt_tokens: int
-    completion_tokens: int
-    total_tokens: int
-class ChatCompletionResponse(BaseModel):
-    id: str
-    object: str = "chat.completion"
-    created: int
-    model: str
-    choices: List[Choice]
-    usage: Usage
-# Authentication
-async def verify_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
-    if credentials.credentials != config.MASTER_API_KEY:
-        error_response = {
-            "error": {
-                "message": "Invalid API key provided",
-                "type": "invalid_request_error",
-                "param": None,
-                "code": "invalid_api_key"
-            }
-        }
-        raise HTTPException(status_code=401, detail=error_response)
-    return credentials.credentials
-# Rate limiting
-def check_rate_limit(client_ip: str) -> tuple[bool, int]:
-    now = time.time()
-    if client_ip not in rate_limit_storage:
-        rate_limit_storage[client_ip] = []
-    # Clean old entries
-    rate_limit_storage[client_ip] = [
-        timestamp for timestamp in rate_limit_storage[client_ip]
-        if now - timestamp < 60
-    ]
-    current_count = len(rate_limit_storage[client_ip])
-    # Check limit
-    if current_count >= config.MAX_REQUESTS_PER_MINUTE:
-        # Calculate reset time
-        oldest_request = min(rate_limit_storage[client_ip])
-        reset_time = int(oldest_request + 60)
-        return False, reset_time
-    # Add current request
-    rate_limit_storage[client_ip].append(now)
-    return True, 0
-# Gemini API interaction
-def get_random_api_key() -> str:
-    return random.choice(config.GEMINI_API_KEYS)
-def convert_to_gemini_format(messages: List[ChatMessage]) -> List[Dict[str, Any]]:
-    gemini_messages = []
-    for msg in messages:
-        if msg.role == "system":
-            # Handle system messages by converting to user message with instruction
-            gemini_messages.append({
-                "role": "user",
-                "parts": [{"text": f"System instruction: {msg.content}"}]
-            })
-        else:
-            role = "user" if msg.role == "user" else "model"
-            gemini_messages.append({
-                "role": role,
-                "parts": [{"text": msg.content}]
-            })
-    return gemini_messages
-def estimate_tokens(text: str) -> int:
-    """Simple token estimation - roughly 1 token per 4 characters"""
-    return max(1, len(text) // 4)
-def call_gemini_api(messages: List[ChatMessage], model: str, temperature: float, max_tokens: Optional[int]) -> Dict[str, Any]:
-    api_key = get_random_api_key()
-    # Convert model name
-    if "gpt-4" in model.lower():
-        gemini_model = "gemini-1.5-pro-latest"
-    elif "gpt-3.5" in model.lower():
-        gemini_model = "gemini-1.5-flash-latest"
-    else:
-        gemini_model = "gemini-1.5-flash-latest"  # Default fallback
-    url = f"https://generativelanguage.googleapis.com/v1beta/models/{gemini_model}:generateContent"
-    # Convert messages
-    gemini_messages = convert_to_gemini_format(messages)
-    payload = {
-        "contents": gemini_messages,
-        "generationConfig": {
-            "temperature": max(0.0, min(2.0, temperature)),  # Clamp temperature
-        },
-        "safetySettings": [
-            {
-                "category": "HARM_CATEGORY_HARASSMENT",
-                "threshold": "BLOCK_MEDIUM_AND_ABOVE"
-            },
-            {
-                "category": "HARM_CATEGORY_HATE_SPEECH",
-                "threshold": "BLOCK_MEDIUM_AND_ABOVE"
-            },
-            {
-                "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
-                "threshold": "BLOCK_MEDIUM_AND_ABOVE"
-            },
-            {
-                "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
-                "threshold": "BLOCK_MEDIUM_AND_ABOVE"
-            }
-        ]
-    }
-    if max_tokens and max_tokens > 0:
-        payload["generationConfig"]["maxOutputTokens"] = min(max_tokens, 8192)  # Gemini limit
-    headers = {
-        "Content-Type": "application/json",
-        "x-goog-api-key": api_key
-    }
-    try:
-        response = requests.post(url, json=payload, headers=headers, timeout=30)
-        if response.status_code != 200:
-            logger.error(f"Gemini API error: {response.status_code} - {response.text}")
-            error_response = {
-                "error": {
-                    "message": f"Gemini API error: {response.text}",
-                    "type": "api_error",
-                    "param": None,
-                    "code": "gemini_api_error"
-                }
-            }
-            raise HTTPException(status_code=response.status_code, detail=error_response)
-        return response.json()
-    except requests.exceptions.Timeout:
-        raise HTTPException(status_code=408, detail="Request timeout")
-    except requests.exceptions.RequestException as e:
-        logger.error(f"Request error: {str(e)}")
-        raise HTTPException(status_code=500, detail="Failed to connect to Gemini API")
-# Routes
-@app.get("/")
-async def root():
-    return {"message": "Advanced Gemini Proxy is running!", "version": "1.0.0"}
-@app.get("/health")
-async def health_check():
-    return {"status": "healthy", "timestamp": time.time()}
-@app.get("/v1/models")
-async def list_models(api_key: str = Depends(verify_api_key)):
-    return {
-        "object": "list",
-        "data": [
-            {
-                "id": "gpt-3.5-turbo",
-                "object": "model",
-                "created": int(time.time()),
-                "owned_by": "gemini-proxy"
-            },
-            {
-                "id": "gpt-4",
-                "object": "model",
-                "created": int(time.time()),
-                "owned_by": "gemini-proxy"
-            }
-        ]
-    }
-@app.post("/v1/chat/completions")
-async def chat_completions(
-    request: ChatCompletionRequest,
-    client_request: Request,
-    api_key: str = Depends(verify_api_key)
-):
-    # Rate limiting
-    client_ip = client_request.client.host
-    allowed, reset_time = check_rate_limit(client_ip)
-    if not allowed:
-        error_response = {
-            "error": {
-                "message": "Rate limit reached for requests",
-                "type": "rate_limit_exceeded",
-                "param": None,
-                "code": "rate_limit_exceeded"
-            }
-        }
-        headers = {
-            "X-RateLimit-Limit": str(config.MAX_REQUESTS_PER_MINUTE),
-            "X-RateLimit-Remaining": "0",
-            "X-RateLimit-Reset": str(reset_time),
-            "Retry-After": str(60)
-        }
-        return JSONResponse(
-            status_code=429,
-            content=error_response,
-            headers=headers
-        )
-    # Validate request
-    if not request.messages:
-        error_response = {
-            "error": {
-                "message": "Missing required parameter: 'messages'",
-                "type": "invalid_request_error",
-                "param": "messages",
-                "code": "missing_required_parameter"
-            }
-        }
-        raise HTTPException(status_code=400, detail=error_response)
     try:
-        # Call Gemini API
-        gemini_response = call_gemini_api(
-            request.messages,
-            request.model,
-            request.temperature,
-            request.max_tokens
-        )
-        # Extract response text
-        if "candidates" not in gemini_response or not gemini_response["candidates"]:
-            # Check for blocked content
-            if "promptFeedback" in gemini_response and "blockReason" in gemini_response["promptFeedback"]:
-                block_reason = gemini_response["promptFeedback"]["blockReason"]
-                raise HTTPException(status_code=400, detail=f"Content blocked: {block_reason}")
-            raise HTTPException(status_code=500, detail="No response from Gemini API")
-        candidate = gemini_response["candidates"][0]
-        # Check if response was blocked
-        if "finishReason" in candidate and candidate["finishReason"] in ["SAFETY", "RECITATION"]:
-            raise HTTPException(status_code=400, detail=f"Response blocked: {candidate['finishReason']}")
-        if "content" not in candidate or "parts" not in candidate["content"]:
-            raise HTTPException(status_code=500, detail="Invalid response format from Gemini API")
-        response_text = candidate["content"]["parts"][0]["text"]
-        # Calculate token usage
-        prompt_text = " ".join([msg.content for msg in request.messages])
-        prompt_tokens = estimate_tokens(prompt_text)
-        completion_tokens = estimate_tokens(response_text)
-        # Convert to OpenAI format
-        response = ChatCompletionResponse(
-            id=f"chatcmpl-{int(time.time())}{random.randint(1000, 9999)}",
-            created=int(time.time()),
-            model=request.model,
-            choices=[Choice(
-                index=0,
-                message={
-                    "role": "assistant",
-                    "content": response_text
-                },
-                finish_reason="stop"
-            )],
-            usage=Usage(
-                prompt_tokens=prompt_tokens,
-                completion_tokens=completion_tokens,
-                total_tokens=prompt_tokens + completion_tokens
             )
-        )
-        return response
-    except HTTPException:
-        raise
     except Exception as e:
-        logger.error(f"Unexpected error: {str(e)}")
-        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
 if __name__ == "__main__":
-    logger.info(f"🚀 Starting Advanced Gemini Proxy on {config.HOST}:{config.PORT}")
-    logger.info(f"🔑 Master API Key: {config.MASTER_API_KEY[:8]}...")
-    logger.info(f"🔧 Loaded {len(config.GEMINI_API_KEYS)} Gemini API key(s)")
-    uvicorn.run(
-        app,
-        host=config.HOST,
-        port=config.PORT,
-        log_level=config.LOG_LEVEL.lower()
-    )

+import gradio as gr
+from transformers import GPT2LMHeadModel, GPT2Tokenizer
+import torch
+# Load model and tokenizer (using smaller GPT-2 for free tier)
+model_name = "gpt2"  # You can also use "gpt2-medium" if it fits in memory
+tokenizer = GPT2Tokenizer.from_pretrained(model_name)
+model = GPT2LMHeadModel.from_pretrained(model_name)
+# Set pad token
+tokenizer.pad_token = tokenizer.eos_token
+def generate_text(prompt, max_length=100, temperature=0.7, top_p=0.9, top_k=50):
+    """Generate text using GPT-2"""
     try:
+        # Encode input
+        inputs = tokenizer.encode(prompt, return_tensors="pt", max_length=512, truncation=True)
+        # Generate
+        with torch.no_grad():
+            outputs = model.generate(
+                inputs,
+                max_length=min(max_length + len(inputs[0]), 512),  # Limit total length
+                temperature=temperature,
+                top_p=top_p,
+                top_k=top_k,
+                do_sample=True,
+                pad_token_id=tokenizer.eos_token_id,
+                num_return_sequences=1
             )
+        # Decode output
+        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Return only the new generated part
+        return generated_text[len(prompt):].strip()
     except Exception as e:
+        return f"Error generating text: {str(e)}"
+# Create Gradio interface
+with gr.Blocks(title="GPT-2 Text Generator") as demo:
+    gr.Markdown("# GPT-2 Text Generation Server")
+    gr.Markdown("Enter a prompt and generate text using GPT-2. Free tier optimized!")
+    with gr.Row():
+        with gr.Column():
+            prompt_input = gr.Textbox(
+                label="Prompt",
+                placeholder="Enter your text prompt here...",
+                lines=3
+            )
+            with gr.Row():
+                max_length = gr.Slider(
+                    minimum=10,
+                    maximum=200,
+                    value=100,
+                    step=10,
+                    label="Max Length"
+                )
+                temperature = gr.Slider(
+                    minimum=0.1,
+                    maximum=2.0,
+                    value=0.7,
+                    step=0.1,
+                    label="Temperature"
+                )
+            with gr.Row():
+                top_p = gr.Slider(
+                    minimum=0.1,
+                    maximum=1.0,
+                    value=0.9,
+                    step=0.1,
+                    label="Top-p"
+                )
+                top_k = gr.Slider(
+                    minimum=1,
+                    maximum=100,
+                    value=50,
+                    step=1,
+                    label="Top-k"
+                )
+            generate_btn = gr.Button("Generate Text", variant="primary")
+        with gr.Column():
+            output_text = gr.Textbox(
+                label="Generated Text",
+                lines=10,
+                placeholder="Generated text will appear here..."
+            )
+    # Examples
+    gr.Examples(
+        examples=[
+            ["Once upon a time in a distant galaxy,"],
+            ["The future of artificial intelligence is"],
+            ["In the heart of the ancient forest,"],
+            ["The detective walked into the room and noticed"],
+        ],
+        inputs=prompt_input
+    )
+    # Connect the function
+    generate_btn.click(
+        fn=generate_text,
+        inputs=[prompt_input, max_length, temperature, top_p, top_k],
+        outputs=output_text
+    )
+# Launch the app
 if __name__ == "__main__":
+    demo.launch()

requirements.txt CHANGED Viewed

@@ -1,6 +1,4 @@
-fastapi==0.104.1
-uvicorn[standard]==0.24.0
-requests==2.31.0
-python-multipart==0.0.6
-pydantic==2.5.0
-python-dotenv==1.0.0

+gradio>=3.50.0
+transformers>=4.30.0
+torch>=2.0.0
+tokenizers>=0.13.0