Spaces:
Sleeping
Sleeping
import os | |
import logging | |
from typing import Optional | |
from datetime import datetime | |
from contextlib import asynccontextmanager | |
from fastapi import FastAPI, HTTPException, Depends, Security, status | |
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials | |
from fastapi.middleware.cors import CORSMiddleware | |
from pydantic import BaseModel, Field | |
import uvicorn | |
# Configure logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# Global variables for model | |
model = None | |
tokenizer = None | |
model_loaded = False | |
torch_available = False | |
async def lifespan(app: FastAPI): | |
# Startup | |
global model, tokenizer, model_loaded, torch_available | |
logger.info("Real LLM AI Assistant starting up...") | |
try: | |
# Try to import torch and transformers | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
torch_available = True | |
logger.info("PyTorch and Transformers available!") | |
# Use a better conversational model | |
model_name = os.getenv("MODEL_NAME", "microsoft/DialoGPT-small") # Use small for better compatibility | |
logger.info(f"Loading real LLM model: {model_name}") | |
# Load tokenizer | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
if tokenizer.pad_token is None: | |
tokenizer.pad_token = tokenizer.eos_token | |
# Load model with optimizations | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
torch_dtype=torch.float32, | |
low_cpu_mem_usage=True, | |
pad_token_id=tokenizer.eos_token_id | |
) | |
model_loaded = True | |
logger.info("Real LLM model loaded successfully!") | |
except ImportError as e: | |
logger.warning(f"PyTorch/Transformers not available: {e}") | |
logger.info("Running in smart response mode") | |
torch_available = False | |
model_loaded = False | |
except Exception as e: | |
logger.warning(f"Could not load LLM model: {e}") | |
logger.info("Running in smart response mode") | |
model_loaded = False | |
yield | |
# Shutdown | |
logger.info("AI Assistant shutting down...") | |
# Initialize FastAPI app with lifespan | |
app = FastAPI( | |
title="Real LLM AI Agent API", | |
description="AI Agent powered by actual LLM models with fallback", | |
version="4.1.0", | |
lifespan=lifespan | |
) | |
# CORS middleware | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=["*"], | |
allow_credentials=True, | |
allow_methods=["*"], | |
allow_headers=["*"], | |
) | |
# Security | |
security = HTTPBearer() | |
# Configuration | |
API_KEYS = { | |
os.getenv("API_KEY_1", "27Eud5J73j6SqPQAT2ioV-CtiCg-p0WNqq6I4U0Ig6E"): "user1", | |
os.getenv("API_KEY_2", "QbzG2CqHU1Nn6F1EogZ1d3dp8ilRTMJQBwTJDQBzS-U"): "user2", | |
} | |
# Request/Response models | |
class ChatRequest(BaseModel): | |
message: str = Field(..., min_length=1, max_length=2000) | |
max_length: Optional[int] = Field(200, ge=50, le=500) | |
temperature: Optional[float] = Field(0.8, ge=0.1, le=1.5) | |
top_p: Optional[float] = Field(0.9, ge=0.1, le=1.0) | |
do_sample: Optional[bool] = Field(True) | |
class ChatResponse(BaseModel): | |
response: str | |
model_used: str | |
timestamp: str | |
processing_time: float | |
tokens_used: int | |
model_loaded: bool | |
class HealthResponse(BaseModel): | |
status: str | |
model_loaded: bool | |
timestamp: str | |
def verify_api_key(credentials: HTTPAuthorizationCredentials = Security(security)) -> str: | |
"""Verify API key authentication""" | |
api_key = credentials.credentials | |
if api_key not in API_KEYS: | |
raise HTTPException( | |
status_code=status.HTTP_401_UNAUTHORIZED, | |
detail="Invalid API key" | |
) | |
return API_KEYS[api_key] | |
def get_smart_fallback_response(message: str) -> str: | |
"""Smart fallback responses when LLM is not available""" | |
message_lower = message.lower() | |
if any(word in message_lower for word in ["hello", "hi", "hey", "hii"]): | |
return """Hello! I'm your AI assistant. I'm currently running in smart mode while the full LLM model loads. | |
I can still help you with questions about: | |
• Machine Learning and AI concepts | |
• Programming and Python | |
• Data Science topics | |
• Technology explanations | |
• General conversations | |
What would you like to know about? I'll do my best to provide helpful information!""" | |
elif any(word in message_lower for word in ["machine learning", "ml"]): | |
return """Machine learning is a fascinating field! It's a subset of artificial intelligence where computers learn to make predictions or decisions by finding patterns in data, rather than being explicitly programmed for every scenario. | |
Key concepts: | |
• **Training**: The model learns from example data | |
• **Patterns**: It identifies relationships and trends | |
• **Prediction**: It applies learned patterns to new data | |
• **Improvement**: Performance gets better with more data | |
Common applications include recommendation systems (like Netflix suggestions), image recognition, natural language processing, and autonomous vehicles. | |
Would you like me to explain any specific aspect of machine learning in more detail?""" | |
elif any(word in message_lower for word in ["ai", "artificial intelligence"]): | |
return """Artificial Intelligence is the simulation of human intelligence in machines! It's about creating systems that can think, learn, and solve problems. | |
Current AI can: | |
• Understand and generate human language | |
• Recognize images and objects | |
• Play complex games at superhuman levels | |
• Drive cars autonomously | |
• Discover new medicines | |
Types of AI: | |
• **Narrow AI**: Specialized for specific tasks (what we have today) | |
• **General AI**: Human-level intelligence across all domains (future goal) | |
• **Super AI**: Beyond human intelligence (theoretical) | |
AI is transforming every industry and changing how we work, learn, and live. What aspect of AI interests you most?""" | |
elif any(word in message_lower for word in ["python", "programming"]): | |
return """Python is an excellent choice for AI and programming! It's known for its simple, readable syntax and powerful capabilities. | |
Why Python is great: | |
• **Easy to learn**: Clear, English-like syntax | |
• **Versatile**: Web development, AI, data science, automation | |
• **Rich ecosystem**: Thousands of libraries and frameworks | |
• **Community**: Large, helpful developer community | |
For AI/ML specifically: | |
• **NumPy**: Numerical computing | |
• **Pandas**: Data manipulation | |
• **Scikit-learn**: Machine learning algorithms | |
• **TensorFlow/PyTorch**: Deep learning | |
Python lets you focus on solving problems rather than wrestling with complex syntax. Are you interested in learning Python for a specific purpose?""" | |
else: | |
return f"""I understand you're asking about: "{message}" | |
I'm currently running in smart mode while the full LLM model loads. I can provide helpful information on topics like: | |
• **Technology**: AI, machine learning, programming | |
• **Science**: Data science, computer science concepts | |
• **Learning**: Programming languages, career advice | |
• **General**: Explanations, discussions, problem-solving | |
Could you be more specific about what you'd like to know? I'm here to help and will provide the most useful information I can! | |
If you're looking for creative writing, storytelling, or very specific technical details, the full LLM model will provide even better responses once it's loaded.""" | |
def generate_llm_response(message: str, max_length: int = 200, temperature: float = 0.8, top_p: float = 0.9, do_sample: bool = True) -> tuple: | |
"""Generate response using actual LLM model or smart fallback""" | |
global model, tokenizer, model_loaded, torch_available | |
if not torch_available: | |
return get_smart_fallback_response(message), "smart_fallback_mode", len(message.split()) | |
if not model_loaded or model is None or tokenizer is None: | |
return get_smart_fallback_response(message), "smart_fallback_mode", len(message.split()) | |
try: | |
import torch | |
# Prepare input with conversation format | |
input_text = f"Human: {message}\nAssistant:" | |
# Tokenize input | |
inputs = tokenizer.encode(input_text, return_tensors="pt") | |
# Generate response | |
with torch.no_grad(): | |
outputs = model.generate( | |
inputs, | |
max_length=inputs.shape[1] + max_length, | |
temperature=temperature, | |
top_p=top_p, | |
do_sample=do_sample, | |
pad_token_id=tokenizer.eos_token_id, | |
eos_token_id=tokenizer.eos_token_id, | |
num_return_sequences=1, | |
repetition_penalty=1.1, | |
length_penalty=1.0 | |
) | |
# Decode response | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Extract only the assistant's response | |
if "Assistant:" in response: | |
response = response.split("Assistant:")[-1].strip() | |
# Remove the input text if it's still there | |
if input_text.replace("Assistant:", "").strip() in response: | |
response = response.replace(input_text.replace("Assistant:", "").strip(), "").strip() | |
# Clean up the response | |
response = response.strip() | |
if not response or len(response) < 10: | |
return get_smart_fallback_response(message), "smart_fallback_mode", len(message.split()) | |
# Count tokens | |
tokens_used = len(tokenizer.encode(response)) | |
return response, os.getenv("MODEL_NAME", "microsoft/DialoGPT-small"), tokens_used | |
except Exception as e: | |
logger.error(f"Error generating LLM response: {str(e)}") | |
return get_smart_fallback_response(message), "smart_fallback_mode", len(message.split()) | |
async def root(): | |
"""Health check endpoint""" | |
return HealthResponse( | |
status="healthy", | |
model_loaded=model_loaded, | |
timestamp=datetime.now().isoformat() | |
) | |
async def health_check(): | |
"""Detailed health check""" | |
return HealthResponse( | |
status="healthy" if model_loaded else "smart_mode", | |
model_loaded=model_loaded, | |
timestamp=datetime.now().isoformat() | |
) | |
async def chat( | |
request: ChatRequest, | |
user: str = Depends(verify_api_key) | |
): | |
"""Main chat endpoint using real LLM model or smart fallback""" | |
start_time = datetime.now() | |
try: | |
# Generate response using actual LLM or smart fallback | |
response_text, model_used, tokens_used = generate_llm_response( | |
request.message, | |
request.max_length, | |
request.temperature, | |
request.top_p, | |
request.do_sample | |
) | |
# Calculate processing time | |
processing_time = (datetime.now() - start_time).total_seconds() | |
return ChatResponse( | |
response=response_text, | |
model_used=model_used, | |
timestamp=datetime.now().isoformat(), | |
processing_time=processing_time, | |
tokens_used=tokens_used, | |
model_loaded=model_loaded | |
) | |
except Exception as e: | |
logger.error(f"Error in chat endpoint: {str(e)}") | |
# Even if there's an error, provide a helpful response | |
return ChatResponse( | |
response="I'm experiencing some technical difficulties, but I'm still here to help! Could you please try rephrasing your question?", | |
model_used="error_recovery_mode", | |
timestamp=datetime.now().isoformat(), | |
processing_time=(datetime.now() - start_time).total_seconds(), | |
tokens_used=0, | |
model_loaded=model_loaded | |
) | |
async def get_model_info(user: str = Depends(verify_api_key)): | |
"""Get information about the loaded model""" | |
return { | |
"model_name": os.getenv("MODEL_NAME", "microsoft/DialoGPT-small"), | |
"model_loaded": model_loaded, | |
"torch_available": torch_available, | |
"status": "active" if model_loaded else "smart_fallback_mode", | |
"capabilities": [ | |
"Real LLM text generation" if model_loaded else "Smart fallback responses", | |
"Conversational AI responses", | |
"Dynamic response generation" if model_loaded else "Contextual smart responses", | |
"Adjustable temperature and top_p" if model_loaded else "Fixed high-quality responses", | |
"Natural language understanding" | |
], | |
"version": "4.1.0", | |
"type": "Real LLM Model" if model_loaded else "Smart Fallback Mode" | |
} | |
if __name__ == "__main__": | |
# For Hugging Face Spaces | |
port = int(os.getenv("PORT", "7860")) | |
uvicorn.run( | |
app, | |
host="0.0.0.0", | |
port=port, | |
reload=False | |
) | |