Spaces:

adsurkasur
/

arina-hf-spaces-api

Runtime error

File size: 8,797 Bytes

from dotenv import load_dotenv
load_dotenv()
from fastapi import APIRouter, Request
from pydantic import BaseModel
from datetime import datetime, timezone
from app.core.device_setup import device
from app.core.message_saving import save_message
from app.core.past_conversations import get_past_conversations
from app.core.memory_management import reset_memory, get_last_interaction, update_last_interaction
from app.core.fact_management import get_user_fact, save_user_fact
from app.core.conversation_retrieval import get_similar_conversations
from app.core.feedback_management import apply_feedback_adjustments
from app.core.logging_setup import logger
from app.core.prompts import SYSTEM_PROMPT
from app.core.interaction_trends import get_time_of_day
from app.core.search_utils import needs_web_search, search_duckduckgo
import os
import asyncio

logger.info("Logger imported successfully in chat_hf.py")

HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN", "")

if not HUGGINGFACE_TOKEN:
    raise ValueError("❌ Hugging Face Token (HF_TOKEN) has not been set yet")

headers = {
    "Authorization": f"Bearer {HUGGINGFACE_TOKEN}"
}

# Replace local model loading with Gemini API integration
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "")

if not GEMINI_API_KEY:
    raise ValueError("❌ Gemini API Key (GEMINI_API_KEY) has not been set yet")

def query_gemini_api(prompt: str) -> str:
    import requests
    url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GEMINI_API_KEY}"
    headers = {"Content-Type": "application/json"}
    payload = {
        "prompt": {"text": prompt},
        "temperature": 0.7,
        "maxOutputTokens": 256
    }

    try:
        response = requests.post(url, headers=headers, json=payload)
        response.raise_for_status()
        data = response.json()
        return data.get("candidates", [{}])[0].get("output", "")
    except requests.exceptions.RequestException as e:
        logger.error(f"🚨 Error querying Gemini API: {e}, Response: {response.text if 'response' in locals() else 'No response'}")
        return "⚠️ An error occurred while generating a response."

# Replace generate_response and query_huggingface with query_gemini_api
def generate_response(prompt_text):
    return query_gemini_api(prompt_text)

# Ensure query_huggingface uses the Gemini API
def query_huggingface(prompt: str) -> str:
    return query_gemini_api(prompt)

def build_clean_prompt(messages):
    """Construct a clean prompt for the AI model."""
    role_map = {
        "system": "System",
        "user": "User",
        "assistant": "Arina"
    }
    prompt = ""
    for msg in messages:
        role = role_map.get(msg["role"], "User")
        prompt += f"{role}: {msg['content'].strip()}\n"
    prompt += "Arina:"
    return prompt

router = APIRouter()

class ChatRequest(BaseModel):
    message: str

@router.post("/chat")
async def chat_with_arina(request: Request, request_body: ChatRequest):
    user_input = request_body.message.strip()
    logger.info(f"📩 User input: {user_input}")
    logger.info("Logger is accessible in the /chat endpoint.")

    # Handle shutdown command
    if user_input.lower() == "arina, shutdown.":
        logger.info("🛑 Shutdown command received. Shutting down the server.")

        # Trigger a graceful shutdown
        async def shutdown():
            logger.info("🛑 Initiating server shutdown.")
            request.app.state.should_exit = True  # Signal uvicorn to exit

        asyncio.create_task(shutdown())
        os._exit(0)  # Forcefully exit the entire system
        return {"response": "🛑 Server is shutting down."}

    if 'logger' not in globals():
        print("DEBUG: Logger is not accessible in the /chat endpoint.")

    # Check if the user's query requires a web search
    if needs_web_search(user_input):
        logger.info(f"🌍 Web search triggered for: {user_input}")
        search_summary, search_links = search_duckduckgo(user_input)

        search_context = f"I found the following information: {search_summary}"
        if search_links:
            search_context += f" (Related links: {', '.join(search_links)})"

        dynamic_prompt = (
            f"User asked: {user_input}\n"
            f"{search_context}\n"
            f"Based on this, please provide a natural, conversational response "
            f"that integrates this information without listing out links verbatim."
        )

        # Initialize messages
        messages = [{"role": "system", "content": SYSTEM_PROMPT}]
        messages = apply_feedback_adjustments(messages)  # Optional: Apply feedback adjustments

        logger.info("🧠 Loading for web search response")
        try:
            prompt_text = "\n".join([msg["content"] for msg in messages])

            arina_reply = query_huggingface(prompt_text).strip()

            if not arina_reply:
                arina_reply = "I'm not sure how to respond to that, but I'm here to help."
        except Exception as e:
            logger.error(f"🚨 Error connecting to web search: {e}")
            arina_reply = "⚠️ Arina is having trouble responding. Try again."

        return {"response": arina_reply}

    # Handle reset command
    if user_input.lower() == "arina, reset.":
        reset_memory()
        return {"response": "✅ Memory wiped."}

    # Retrieve user-specific data
    user_name = get_user_fact("name")

    # Retrieve past relevant conversations
    history = get_past_conversations(limit=3)
    formatted_history = [{"role": role, "content": msg} for _, role, msg, _ in history]
    relevant_history = get_similar_conversations(user_input, top_n=3)
    formatted_relevant_history = [{"role": "system", "content": f"Previously, the user discussed: {msg}"} for msg in relevant_history]

    # Generate time-aware context
    last_interaction = get_last_interaction()
    current_time_of_day = get_time_of_day()
    most_active_time = get_user_fact("most_active_time") or "unknown"

    time_context = f"Be aware that it is {current_time_of_day}. Adjust the conversation naturally based on this."
    if most_active_time != "unknown":
        time_context += f" The user is usually active in the {most_active_time}. Adjust your tone accordingly."

    if last_interaction:
        # Ensure current datetime is timezone-aware
        time_gap = (datetime.now(timezone.utc) - last_interaction).total_seconds()
        if time_gap > 86400:
            time_context += " The user has returned after a long time. Let them feel welcomed without explicitly mentioning the gap."
        elif time_gap > 43200:
            time_context += f" Since it is {current_time_of_day}, ensure your response flows accordingly."
        elif time_gap > 18000:
            time_context += f" Adapt the conversation for a {current_time_of_day} chat naturally."
        else:
            time_context += " The conversation is active; keep it engaging."

    # Construct messages for the AI model
    system_prompt_adjusted = apply_feedback_adjustments([{"role": "system", "content": SYSTEM_PROMPT}])[0]["content"]
    messages = [{"role": "system", "content": system_prompt_adjusted + "\n\n" + time_context}]
    messages.extend(formatted_history)
    messages.extend(formatted_relevant_history)
    messages.append({"role": "user", "content": user_input})

    # Call the AI model
    logger.info("🧠 Loading for general chat response")

    try:
        prompt_text = build_clean_prompt(messages)

        arina_reply = query_huggingface(prompt_text).strip()

        if not arina_reply:
            logger.warning("⚠️ Empty response!")
            arina_reply = "🤖 I'm not sure how to respond to that."
    except Exception as e:
        logger.error(f"🚨 Error connecting: {e}")
        arina_reply = "⚠️ Arina is having trouble responding. Try again."

    # Save conversation to memory
    try:
        save_message(datetime.now(timezone.utc), "global_chat", "user", user_input)
        save_message(datetime.now(timezone.utc), "global_chat", "assistant", arina_reply)
        update_last_interaction()

        # Update most active time based on latest interaction
        current_hour = datetime.now(timezone.utc).hour
        if 6 <= current_hour < 12:
            save_user_fact("most_active_time", "morning")
        elif 12 <= current_hour < 18:
            save_user_fact("most_active_time", "afternoon")
        elif 18 <= current_hour < 24:
            save_user_fact("most_active_time", "evening")
        else:
            save_user_fact("most_active_time", "night")
    except Exception as e:
        logger.error(f"🚨 Error saving message to database: {e}")

    logger.info(f"💬 Arina's reply: {arina_reply}")
    return {"response": arina_reply}