Spaces:

davidgturner
/

GaiaAgentEvaluator

Sleeping

File size: 11,774 Bytes

fc78ae4

"""
Fallback model implementation for testing when llama-cpp-python is not available.

This provides a compatible model class that doesn't require any external dependencies,
allowing the rest of the application to function while we solve the llama-cpp-python
installation issues.
"""

import os
import logging
from typing import Dict, List, Optional, Any, Union
import requests
from smolagents import Model
from pathlib import Path

# Try to import llama_cpp, but don't fail if not available
try:
    from llama_cpp import Llama
    from pathlib import Path
    LLAMA_CPP_AVAILABLE = True
except ImportError:
    LLAMA_CPP_AVAILABLE = False
    print("llama_cpp module not available, using fallback implementation")

logger = logging.getLogger(__name__)

class LlamaCppModel(Model):
    """Model using llama.cpp Python bindings for efficient local inference without PyTorch.
    Falls back to a simple text generation if llama_cpp is not available."""
    def __init__(
        self, 
        model_path: str = None,
        model_url: str = None,
        n_ctx: int = 2048,
        n_gpu_layers: int = 0,
        max_tokens: int = 512,
        temperature: float = 0.7,
        verbose: bool = True
    ):
        """
        Initialize a local llama.cpp model or fallback to a simple implementation.
        
        Args:
            model_path: Path to local GGUF model file
            model_url: URL to download model if model_path doesn't exist
            n_ctx: Context window size
            n_gpu_layers: Number of layers to offload to GPU (0 means CPU only)
            max_tokens: Maximum new tokens to generate
            temperature: Sampling temperature
            verbose: Whether to print verbose messages
        """
        super().__init__()
        
        self.model_path = model_path
        self.model_url = model_url
        self.n_ctx = n_ctx
        self.max_tokens = max_tokens
        self.temperature = temperature
        self.verbose = verbose
        self.llm = None
        
        # Check if we can use llama_cpp
        if LLAMA_CPP_AVAILABLE:
            try:
                if self.verbose:
                    print("Attempting to initialize LlamaCpp model...")
                
                # Try to initialize the real model
                if model_path and os.path.exists(model_path):
                    if self.verbose:
                        print(f"Loading model from {model_path}...")
                    
                    # Initialize the llama-cpp model
                    self.llm = Llama(
                        model_path=model_path,
                        n_ctx=n_ctx,
                        n_gpu_layers=n_gpu_layers,
                        verbose=verbose
                    )
                    
                    if self.verbose:
                        print("LlamaCpp model loaded successfully")
                else:
                    if self.verbose:
                        print(f"Model path not found or not specified. Using fallback mode.")
            except Exception as e:
                logger.error(f"Error initializing LlamaCpp model: {e}")
                if self.verbose:
                    print(f"Error initializing LlamaCpp model: {e}")
                self.llm = None
        else:
            if self.verbose:
                print("LlamaCpp not available, using fallback implementation")
                
        if not self.llm and self.verbose:
            print("Using fallback text generation mode")
    
    def _resolve_model_path(self, model_path: str = None, model_url: str = None) -> str:
        """
        Resolve model path, downloading if necessary.
        
        Returns:
            Absolute path to model file
        """
        # Default to a small model if none specified
        if not model_path:
            models_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "models")
            os.makedirs(models_dir, exist_ok=True)
            model_path = os.path.join(models_dir, "ggml-model-q4_0.bin")
        
        # Convert to Path for easier handling
        path = Path(model_path)
        
        # If model exists, return it
        if path.exists():
            return str(path.absolute())
        
        # Download if URL provided
        if model_url and not path.exists():
            try:
                print(f"Downloading model from {model_url}...")
                os.makedirs(path.parent, exist_ok=True)
                
                try:
                    # Try with streaming download first
                    with requests.get(model_url, stream=True, timeout=30) as r:
                        r.raise_for_status()
                        total_size = int(r.headers.get('content-length', 0))
                        block_size = 8192
                        
                        with open(path, 'wb') as f:
                            downloaded = 0
                            for chunk in r.iter_content(chunk_size=block_size):
                                if chunk:
                                    f.write(chunk)
                                    downloaded += len(chunk)
                                    if total_size > 0:
                                        percent = (downloaded / total_size) * 100
                                        if percent % 10 < (block_size / total_size) * 100:
                                            print(f"Download progress: {int(percent)}%")
                except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
                    print(f"Streaming download timed out: {e}. Using a simpler approach...")
                    # Fall back to simpler download method
                    r = requests.get(model_url, timeout=60)
                    r.raise_for_status()
                    with open(path, 'wb') as f:
                        f.write(r.content)
                        print("Download complete with simple method")
                
                print(f"Model download complete: {path}")
                return str(path.absolute())
            except Exception as e:
                logger.error(f"Error downloading model: {e}")
                print(f"Error downloading model: {e}")
                print("Continuing with dummy model instead...")
                # Create a small dummy model file so we can continue
                with open(path, 'wb') as f:
                    f.write(b"DUMMY MODEL")
                return str(path.absolute())
        
        # If we get here without a model, create a dummy one
        print(f"Model file not found at {model_path} and no URL provided. Creating dummy model...")
        os.makedirs(path.parent, exist_ok=True)
        with open(path, 'wb') as f:
            f.write(b"DUMMY MODEL")
        return str(path.absolute())
    
    def generate(self, prompt: str, **kwargs) -> str:
        """
        Generate text completion for the given prompt.
        
        Args:
            prompt: Input text
            
        Returns:
            Generated text completion
        """
        try:
            if self.verbose:
                print(f"Generating with prompt: {prompt[:50]}...")
            
            # If we have a real model, use it
            if self.llm:
                # Actual generation with llama-cpp
                response = self.llm(
                    prompt=prompt,
                    max_tokens=self.max_tokens,
                    temperature=self.temperature,
                    echo=False  # Don't include the prompt in the response
                )
                
                # Extract generated text
                if not response:
                    return ""
                    
                if isinstance(response, dict):
                    generated_text = response.get('choices', [{}])[0].get('text', '')
                else:
                    # List of responses
                    generated_text = response[0].get('text', '')
                
                return generated_text.strip()
            else:
                # Fallback simple generation
                if self.verbose:
                    print("Using fallback text generation")
                
                # Extract key information from prompt
                words = prompt.strip().split()
                last_words = ' '.join(words[-10:]) if len(words) > 10 else prompt
                
                # Simple response generation based on prompt content
                if "?" in prompt:
                    return f"Based on the information provided, I believe the answer is related to {last_words}. This is a fallback response as the LLM model could not be loaded."
                else:
                    return f"I understand you're asking about {last_words}. Since I'm running in fallback mode without a proper language model, I can only acknowledge your query but not provide a detailed response."
            
        except Exception as e:
            logger.error(f"Error generating text: {e}")
            if self.verbose:
                print(f"Error generating text: {e}")
            return f"Error generating response: {str(e)}"
    
    def generate_with_tools(
        self, 
        messages: List[Dict[str, Any]], 
        tools: Optional[List[Dict[str, Any]]] = None,
        **kwargs
    ) -> Dict[str, Any]:
        """
        Generate a response with tool-calling capabilities.
        This method implements the smolagents Model interface for tool-calling.
        
        Args:
            messages: List of message objects with role and content
            tools: List of tool definitions
            
        Returns:
            Response with message and optional tool calls
        """
        try:
            # Format messages into a prompt
            prompt = self._format_messages_to_prompt(messages, tools)
            
            # Generate response
            completion = self.generate(prompt)
            
            # For now, just return the text without tool parsing
            return {
                "message": {
                    "role": "assistant",
                    "content": completion
                }
            }
        except Exception as e:
            logger.error(f"Error generating with tools: {e}")
            print(f"Error generating with tools: {e}")
            return {
                "message": {
                    "role": "assistant",
                    "content": f"Error: {str(e)}"
                }
            }
    
    def _format_messages_to_prompt(
        self, 
        messages: List[Dict[str, Any]], 
        tools: Optional[List[Dict[str, Any]]] = None
    ) -> str:
        """Format chat messages into a text prompt for the model."""
        formatted_prompt = ""
        
        # Include tool descriptions if available
        if tools and len(tools) > 0:
            tool_descriptions = "\n".join([
                f"Tool {i+1}: {tool['name']} - {tool['description']}"
                for i, tool in enumerate(tools)
            ])
            formatted_prompt += f"Available tools:\n{tool_descriptions}\n\n"
        
        # Add conversation history
        for msg in messages:
            role = msg.get("role", "")
            content = msg.get("content", "")
            
            if role == "system":
                formatted_prompt += f"System: {content}\n\n"
            elif role == "user":
                formatted_prompt += f"User: {content}\n\n"
            elif role == "assistant":
                formatted_prompt += f"Assistant: {content}\n\n"
        
        # Add final prompt for assistant
        formatted_prompt += "Assistant: "
        
        return formatted_prompt