Spaces:
Running
Running
import os | |
import uvicorn | |
from fastapi import FastAPI, HTTPException | |
from fastapi.middleware.cors import CORSMiddleware | |
from fastapi.responses import HTMLResponse, FileResponse | |
from fastapi.staticfiles import StaticFiles | |
from pydantic import BaseModel | |
from transformers import pipeline, AutoTokenizer, AutoModel, set_seed | |
import torch | |
from typing import Optional | |
import asyncio | |
import time | |
import gc | |
import re | |
import random | |
# Inisialisasi FastAPI | |
app = FastAPI(title="Character AI Chat - CPU Optimized Backend") | |
# CORS middleware untuk frontend terpisah | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=["*"], # Dalam production, ganti dengan domain spesifik | |
allow_credentials=True, | |
allow_methods=["*"], | |
allow_headers=["*"], | |
) | |
async def serve_frontend(): | |
return FileResponse("index.html") | |
# Set seed untuk konsistensi | |
set_seed(42) | |
# CPU-Optimized 11 models configuration | |
MODELS = { | |
"distil-gpt-2": { | |
"name": "DistilGPT-2 ⚡", | |
"model_path": "Lyon28/Distil_GPT-2", | |
"task": "text-generation", | |
"max_tokens": 35, | |
"priority": 1 | |
}, | |
"gpt-2-tinny": { | |
"name": "GPT-2 Tinny ⚡", | |
"model_path": "Lyon28/GPT-2-Tinny", | |
"task": "text-generation", | |
"max_tokens": 30, | |
"priority": 1 | |
}, | |
"bert-tinny": { | |
"name": "BERT Tinny 🎭", | |
"model_path": "Lyon28/Bert-Tinny", | |
"task": "text-classification", | |
"max_tokens": 0, | |
"priority": 1 | |
}, | |
"distilbert-base-uncased": { | |
"name": "DistilBERT 🎭", | |
"model_path": "Lyon28/Distilbert-Base-Uncased", | |
"task": "text-classification", | |
"max_tokens": 0, | |
"priority": 1 | |
}, | |
"albert-base-v2": { | |
"name": "ALBERT Base 🎭", | |
"model_path": "Lyon28/Albert-Base-V2", | |
"task": "text-classification", | |
"max_tokens": 0, | |
"priority": 2 | |
}, | |
"electra-small": { | |
"name": "ELECTRA Small 🎭", | |
"model_path": "Lyon28/Electra-Small", | |
"task": "text-classification", | |
"max_tokens": 0, | |
"priority": 2 | |
}, | |
"t5-small": { | |
"name": "T5 Small 🔄", | |
"model_path": "Lyon28/T5-Small", | |
"task": "text2text-generation", | |
"max_tokens": 40, | |
"priority": 2 | |
}, | |
"gpt-2": { | |
"name": "GPT-2 Standard", | |
"model_path": "Lyon28/GPT-2", | |
"task": "text-generation", | |
"max_tokens": 45, | |
"priority": 2 | |
}, | |
"tinny-llama": { | |
"name": "Tinny Llama", | |
"model_path": "Lyon28/Tinny-Llama", | |
"task": "text-generation", | |
"max_tokens": 50, | |
"priority": 3 | |
}, | |
"pythia": { | |
"name": "Pythia", | |
"model_path": "Lyon28/Pythia", | |
"task": "text-generation", | |
"max_tokens": 50, | |
"priority": 3 | |
}, | |
"gpt-neo": { | |
"name": "GPT-Neo", | |
"model_path": "Lyon28/GPT-Neo", | |
"task": "text-generation", | |
"max_tokens": 55, | |
"priority": 3 | |
} | |
} | |
class ChatRequest(BaseModel): | |
message: str | |
model: Optional[str] = "distil-gpt-2" | |
situation: Optional[str] = "Santai" | |
location: Optional[str] = "Ruang tamu" | |
char_name: Optional[str] = "Sayang" | |
user_name: Optional[str] = "Kamu" | |
max_length: Optional[int] = 150 | |
# Character AI Response Templates | |
CHARACTER_TEMPLATES = { | |
"romantic": [ | |
"iya sayang, {context}. Apakah kamu merasa nyaman di sini?", | |
"tentu saja, {context}. Aku senang bisa bersama kamu seperti ini.", | |
"benar sekali, {context}. Rasanya damai ya berada di sini bersama.", | |
"hmm iya, {context}. Kamu selalu membuatku merasa bahagia.", | |
"ya sayang, {context}. Momen seperti ini sangat berharga untukku." | |
], | |
"casual": [ | |
"iya, {context}. Suasananya memang enak banget.", | |
"betul juga, {context}. Aku juga merasa santai di sini.", | |
"ya ampun, {context}. Seneng deh bisa kayak gini.", | |
"hmm iya, {context}. Bikin pikiran jadi tenang.", | |
"benar banget, {context}. Cocok buat santai-santai." | |
], | |
"caring": [ | |
"iya, {context}. Kamu baik-baik saja kan?", | |
"ya, {context}. Semoga kamu merasa nyaman.", | |
"betul, {context}. Aku harap kamu senang.", | |
"hmm, {context}. Apakah kamu butuh sesuatu?", | |
"iya sayang, {context}. Jangan sungkan bilang kalau butuh apa-apa." | |
], | |
"friendly": [ | |
"wah iya, {context}. Keren banget ya!", | |
"bener tuh, {context}. Asik banget suasananya.", | |
"iya dong, {context}. Mantep deh!", | |
"setuju banget, {context}. Bikin happy.", | |
"ya ampun, {context}. Seru banget ini!" | |
] | |
} | |
def create_character_prompt(user_input: str, situation: str, location: str, char_name: str, user_name: str) -> str: | |
"""Create character AI style prompt""" | |
clean_input = user_input.replace("{{User}}", user_name).replace("{{Char}}", char_name) | |
prompt = f"""Situasi: {situation} | |
Latar: {location} | |
{user_name}: {clean_input} | |
{char_name}: """ | |
return prompt | |
def enhance_character_response(response: str, char_name: str, user_name: str, situation: str, user_input: str) -> str: | |
"""Enhance response with character AI style""" | |
response = response.strip() | |
# Remove duplicate names/prefixes | |
response = re.sub(f'^{char_name}[:.]?\\s*', '', response, flags=re.IGNORECASE) | |
response = re.sub(f'^{user_name}[:.]?\\s*', '', response, flags=re.IGNORECASE) | |
response = re.sub(r'^(iya|ya|oh|hmm|tentu|baik)[:.]?\s*', '', response, flags=re.IGNORECASE) | |
# Determine response style based on situation and input | |
situation_lower = situation.lower() | |
input_lower = user_input.lower() | |
if any(word in situation_lower for word in ["romantis", "sayang", "cinta"]) or any(word in input_lower for word in ["sayang", "cinta", "peluk"]): | |
templates = CHARACTER_TEMPLATES["romantic"] | |
context_key = "romantic" | |
elif any(word in situation_lower for word in ["santai", "tenang", "rileks"]): | |
templates = CHARACTER_TEMPLATES["casual"] | |
context_key = "casual" | |
elif any(word in input_lower for word in ["baik", "sehat", "aman", "nyaman"]): | |
templates = CHARACTER_TEMPLATES["caring"] | |
context_key = "caring" | |
else: | |
templates = CHARACTER_TEMPLATES["friendly"] | |
context_key = "friendly" | |
# Generate contextual response | |
if not response or len(response.strip()) < 5: | |
# Extract context from user input | |
context_words = ["indah", "bagus", "cantik", "keren", "seru", "asik", "enak", "nyaman"] | |
found_context = next((word for word in context_words if word in input_lower), "menyenangkan") | |
template = random.choice(templates) | |
response = template.format(context=f"memang {found_context} sekali") | |
else: | |
# Enhance existing response | |
if not any(starter in response.lower() for starter in ["iya", "ya", "hmm", "oh", "tentu", "benar"]): | |
starters = ["iya", "ya", "hmm", "oh"] if context_key in ["romantic", "caring"] else ["iya", "wah", "bener"] | |
response = f"{random.choice(starters)}, {response}" | |
# Add natural endings based on context | |
if not any(punct in response for punct in ['.', '!', '?']): | |
if context_key == "romantic": | |
endings = ["sayang.", "ya.", "kan?", "ya sayang?"] | |
elif context_key == "caring": | |
endings = ["ya.", "kan?", "kok?", "deh."] | |
else: | |
endings = ["!", "deh!", "ya!", "kan!"] | |
response += random.choice(endings) | |
# Limit response length for CPU optimization | |
if len(response) > 120: | |
sentences = response.split('.') | |
if len(sentences) > 1: | |
response = sentences[0] + '.' | |
else: | |
response = response[:117] + "..." | |
return response | |
# CPU-Optimized startup | |
async def load_models(): | |
app.state.pipelines = {} | |
app.state.tokenizers = {} | |
# Set CPU optimizations | |
torch.set_num_threads(2) | |
os.environ['OMP_NUM_THREADS'] = '2' | |
os.environ['MKL_NUM_THREADS'] = '2' | |
os.environ['NUMEXPR_NUM_THREADS'] = '2' | |
# Set cache | |
os.environ['HF_HOME'] = '/tmp/.cache/huggingface' | |
os.environ['TRANSFORMERS_CACHE'] = '/tmp/.cache/huggingface' | |
os.makedirs(os.environ['HF_HOME'], exist_ok=True) | |
print("🎭 Character AI Backend - CPU Optimized Ready!") | |
# Enhanced Chat API for Character AI | |
async def chat(request: ChatRequest): | |
start_time = time.time() | |
try: | |
model_id = request.model.lower() | |
if model_id not in MODELS: | |
model_id = "distil-gpt-2" | |
model_config = MODELS[model_id] | |
# Lazy loading dengan optimasi CPU | |
if model_id not in app.state.pipelines: | |
print(f"🎭 Loading Character Model {model_config['name']}...") | |
pipeline_kwargs = { | |
"task": model_config["task"], | |
"model": model_config["model_path"], | |
"device": -1, | |
"torch_dtype": torch.float32, | |
"model_kwargs": { | |
"torchscript": False, | |
"low_cpu_mem_usage": True | |
} | |
} | |
app.state.pipelines[model_id] = pipeline(**pipeline_kwargs) | |
gc.collect() | |
pipe = app.state.pipelines[model_id] | |
# Create character prompt | |
char_prompt = create_character_prompt( | |
request.message, | |
request.situation, | |
request.location, | |
request.char_name, | |
request.user_name | |
) | |
if model_config["task"] == "text-generation": | |
# Enhanced generation for character AI | |
result = pipe( | |
char_prompt, | |
max_length=min(len(char_prompt.split()) + model_config["max_tokens"], request.max_length // 2), | |
temperature=0.8, | |
do_sample=True, | |
top_p=0.9, | |
top_k=50, | |
repetition_penalty=1.1, | |
pad_token_id=pipe.tokenizer.eos_token_id, | |
num_return_sequences=1, | |
early_stopping=True | |
)[0]['generated_text'] | |
# Extract character response | |
if char_prompt in result: | |
result = result[len(char_prompt):].strip() | |
# Clean and enhance response | |
result = enhance_character_response(result, request.char_name, request.user_name, request.situation, request.message) | |
elif model_config["task"] == "text-classification": | |
# For classification models, create emotion-based responses | |
try: | |
output = pipe(request.message, truncation=True, max_length=128)[0] | |
emotion_score = output['score'] | |
if emotion_score > 0.8: | |
emotion_responses = [ | |
f"iya {request.user_name}, aku merasakan energi positif dari kata-katamu!", | |
f"wah, {request.user_name} terlihat sangat antusias ya!", | |
f"senang banget deh lihat {request.user_name} kayak gini!" | |
] | |
elif emotion_score > 0.6: | |
emotion_responses = [ | |
f"hmm, aku bisa merasakan perasaan {request.user_name} nih.", | |
f"ya {request.user_name}, suasana hatimu cukup bagus ya.", | |
f"oke {request.user_name}, kayaknya kamu dalam mood yang baik." | |
] | |
else: | |
emotion_responses = [ | |
f"iya {request.user_name}, aku di sini untuk kamu.", | |
f"hmm {request.user_name}, mau cerita lebih lanjut?", | |
f"baiklah {request.user_name}, aku mendengarkan." | |
] | |
result = random.choice(emotion_responses) | |
except: | |
result = enhance_character_response("", request.char_name, request.user_name, request.situation, request.message) | |
elif model_config["task"] == "text2text-generation": | |
# For T5-like models | |
try: | |
t5_input = f"respond as {request.char_name} in {request.situation}: {request.message}" | |
result = pipe( | |
t5_input, | |
max_length=model_config["max_tokens"], | |
temperature=0.7, | |
early_stopping=True | |
)[0]['generated_text'] | |
result = enhance_character_response(result, request.char_name, request.user_name, request.situation, request.message) | |
except: | |
result = enhance_character_response("", request.char_name, request.user_name, request.situation, request.message) | |
# Final validation | |
if not result or len(result.strip()) < 3: | |
result = enhance_character_response("", request.char_name, request.user_name, request.situation, request.message) | |
processing_time = round((time.time() - start_time) * 1000) | |
return { | |
"response": result, | |
"model": model_config["name"], | |
"status": "success", | |
"processing_time": f"{processing_time}ms", | |
"character": request.char_name, | |
"situation": request.situation, | |
"location": request.location | |
} | |
except Exception as e: | |
print(f"❌ Character AI Error: {e}") | |
processing_time = round((time.time() - start_time) * 1000) | |
# Fallback character responses | |
fallback_responses = [ | |
f"maaf {request.user_name}, aku sedang bingung. Bisa ulangi lagi?", | |
f"hmm {request.user_name}, kayaknya aku butuh waktu sebentar untuk berpikir.", | |
f"ya {request.user_name}, coba pakai kata yang lebih sederhana?", | |
f"iya {request.user_name}, aku masih belajar nih. Sabar ya." | |
] | |
fallback = random.choice(fallback_responses) | |
return { | |
"response": fallback, | |
"status": "error", | |
"processing_time": f"{processing_time}ms", | |
"character": request.char_name | |
} | |
# Health check endpoint | |
async def health(): | |
loaded_models = len(app.state.pipelines) if hasattr(app.state, 'pipelines') else 0 | |
return { | |
"status": "healthy", | |
"platform": "CPU", | |
"loaded_models": loaded_models, | |
"total_models": len(MODELS), | |
"optimization": "Character AI CPU-Tuned", | |
"backend_version": "1.0.0" | |
} | |
# Model info endpoint | |
async def get_models(): | |
return { | |
"models": [ | |
{ | |
"id": k, | |
"name": v["name"], | |
"task": v["task"], | |
"max_tokens": v["max_tokens"], | |
"priority": v["priority"], | |
"cpu_optimized": True, | |
"character_ai_ready": True | |
} | |
for k, v in MODELS.items() | |
], | |
"platform": "CPU", | |
"recommended_for_roleplay": ["distil-gpt-2", "gpt-2", "gpt-neo", "tinny-llama"], | |
"recommended_for_analysis": ["bert-tinny", "distilbert-base-uncased", "albert-base-v2"] | |
} | |
# Configuration endpoint | |
async def get_config(): | |
return { | |
"default_situation": "Santai", | |
"default_location": "Ruang tamu", | |
"default_char_name": "Sayang", | |
"default_user_name": "Kamu", | |
"max_response_length": 300, | |
"min_response_length": 50, | |
"supported_languages": ["id", "en"], | |
"character_templates": list(CHARACTER_TEMPLATES.keys()) | |
} | |
# Inference endpoint untuk kompatibilitas | |
async def inference(request: dict): | |
"""CPU-Optimized inference endpoint untuk kompatibilitas""" | |
try: | |
message = request.get("message", "") | |
model_path = request.get("model", "Lyon28/Distil_GPT-2") | |
# Map model path to internal model | |
model_key = model_path.split("/")[-1].lower().replace("_", "-") | |
model_mapping = { | |
"distil-gpt-2": "distil-gpt-2", | |
"gpt-2-tinny": "gpt-2-tinny", | |
"bert-tinny": "bert-tinny", | |
"distilbert-base-uncased": "distilbert-base-uncased", | |
"albert-base-v2": "albert-base-v2", | |
"electra-small": "electra-small", | |
"t5-small": "t5-small", | |
"gpt-2": "gpt-2", | |
"tinny-llama": "tinny-llama", | |
"pythia": "pythia", | |
"gpt-neo": "gpt-neo" | |
} | |
internal_model = model_mapping.get(model_key, "distil-gpt-2") | |
# Create request | |
chat_request = ChatRequest( | |
message=message, | |
model=internal_model, | |
situation=request.get("situation", "Santai"), | |
location=request.get("location", "Ruang tamu"), | |
char_name=request.get("char_name", "Sayang"), | |
user_name=request.get("user_name", "Kamu") | |
) | |
result = await chat(chat_request) | |
return { | |
"result": result["response"], | |
"status": "success", | |
"model_used": result["model"], | |
"processing_time": result.get("processing_time", "0ms"), | |
"character_info": { | |
"name": result.get("character", "Character"), | |
"situation": result.get("situation", "Unknown"), | |
"location": result.get("location", "Unknown") | |
} | |
} | |
except Exception as e: | |
print(f"❌ Inference Error: {e}") | |
return { | |
"result": "🎭 Character sedang bersiap, coba lagi sebentar...", | |
"status": "error" | |
} | |
# Run dengan CPU optimizations | |
if __name__ == "__main__": | |
port = int(os.environ.get("PORT", 7860)) | |
uvicorn.run( | |
app, | |
host="0.0.0.0", | |
port=port, | |
log_level="info", | |
workers=1, | |
timeout_keep_alive=30, | |
access_log=False | |
) |