mcp_mod_test

Sleeping

App Files Files Community

mtyrrell commited on Jun 16

Commit

000787f

1 Parent(s): 8539509

cleanup and harmonization

Browse files

Files changed (8) hide show

.gitignore +1 -0
Dockerfile +2 -1
README.md +8 -19
app/main.py +2 -88
app/prompt.py +0 -7
app/utils.py +178 -0
params.cfg +35 -0
requirements.txt +19 -5

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .DS_Store

Dockerfile CHANGED Viewed

@@ -13,7 +13,8 @@ RUN pip install --no-cache-dir -r requirements.txt
 # -------- copy source --------
 COPY app ./app
-COPY model_params.cfg .
 # Ports:
 #  • 7860 → Gradio UI (HF Spaces standard)

 # -------- copy source --------
 COPY app ./app
+COPY params.cfg .
+COPY .env* ./
 # Ports:
 #  • 7860 → Gradio UI (HF Spaces standard)

README.md CHANGED Viewed

@@ -8,27 +8,16 @@ pinned: false
 license: mit
 ---
-# RAG Generation Service
-This is a Retrieval-Augmented Generation (RAG) service that answers questions based on provided context.
-## How to use
-1. Enter your question in the "Query" field
-2. Paste relevant documents or context in the "Context" field
-3. Click submit to get an AI-generated answer based on your context
-## Features
-- Uses state-of-the-art language models via Hugging Face Inference API
-- Supports multiple model providers
-- Clean, intuitive interface
-- Example queries to get started
 ## Configuration
-This Space requires a `HF_TOKEN` environment variable to be set with your Hugging Face access token.
-## Model Support
-By default, this uses `meta-llama/Meta-Llama-3-8B-Instruct`, but you can configure different models via environment variables.

 license: mit
 ---
+# Generation Module
+This is an LLM-based generation service designed to be deployed as a modular component of a broader RAG system. The service runs on a docker container and exposes a gradio UI on port 7860 as well as an MCP endpoint.
 ## Configuration
+1. The module requires an API key (set as an environment variable) for an inference provider to run. Multiple inference providers are supported. Make sure to set the appropriate environment variables:
+- OpenAI: `OPENAI_API_KEY`
+- Anthropic: `ANTHROPIC_API_KEY`
+- Cohere: `COHERE_API_KEY`
+- HuggingFace: `HF_TOKEN`
+2. Inference provider and model settings are accessible via params.cfg

app/main.py CHANGED Viewed

@@ -1,81 +1,5 @@
-import os, asyncio, logging
 import gradio as gr
-from huggingface_hub import InferenceClient
-from .prompt import build_prompt
-# ---------------------------------------------------------------------
-# model / client initialisation
-# ---------------------------------------------------------------------
-HF_TOKEN  = os.getenv("HF_TOKEN")
-MODEL_ID  = os.getenv("MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct")
-MAX_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "512"))
-TEMPERATURE = float(os.getenv("TEMPERATURE", "0.2"))
-if not HF_TOKEN:
-    raise RuntimeError(
-        "HF_TOKEN env-var missing. "
-    )
-client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
-# ---------------------------------------------------------------------
-# Core generation function for both Gradio UI and MCP
-# ---------------------------------------------------------------------
-async def _call_llm(prompt: str) -> str:
-    """
-    Try text_generation first (for models/providers that still support it);
-    fall back to chat_completion when the provider is chat-only (Novita, etc.).
-    """
-    try:
-        # hf-inference
-        return await asyncio.to_thread(
-            client.text_generation,
-            prompt,
-            max_new_tokens=MAX_TOKENS,
-            temperature=TEMPERATURE,
-        )
-    except ValueError as e:
-        if "Supported task: conversational" not in str(e):
-            raise                              # genuine error → bubble up
-        # fallback for Novita
-        messages = [{"role": "user", "content": prompt}]
-        completion = await asyncio.to_thread(
-            client.chat_completion,
-            messages=messages,
-            model=MODEL_ID,
-            max_tokens=MAX_TOKENS,
-            temperature=TEMPERATURE,
-        )
-        return completion.choices[0].message.content.strip()
-async def rag_generate(query: str, context: str) -> str:
-    """
-    Generate an answer to a query using provided context through RAG.
-    This function takes a user query and relevant context, then uses a language model
-    to generate a comprehensive answer based on the provided information.
-    Args:
-        query (str): The user's question or query
-        context (str): The relevant context/documents to use for answering
-    Returns:
-        str: The generated answer based on the query and context
-    """
-    if not query.strip():
-        return "Error: Query cannot be empty"
-    if not context.strip():
-        return "Error: Context cannot be empty"
-    prompt = build_prompt(query, context)
-    try:
-        answer = await _call_llm(prompt)
-        return answer
-    except Exception as e:
-        logging.exception("Generation failed")
-        return f"Error: {str(e)}"
 # ---------------------------------------------------------------------
 # Gradio Interface with MCP support
@@ -102,17 +26,7 @@ ui = gr.Interface(
         show_copy_button=True
     ),
     title="RAG Generation Service",
-    description="Ask questions and get answers based on your provided context. This service is also available as an MCP server for integration with AI applications.",
-    examples=[
-        [
-            "What is the main benefit mentioned?",
-            "Machine learning has revolutionized many industries. The main benefit is increased efficiency and accuracy in data processing."
-        ],
-        [
-            "Who is the CEO?",
-            "Company ABC was founded in 2020. The current CEO is Jane Smith, who has led the company to significant growth."
-        ]
-    ]
 )
 # Launch with MCP server enabled

 import gradio as gr
+from .utils import rag_generate
 # ---------------------------------------------------------------------
 # Gradio Interface with MCP support
         show_copy_button=True
     ),
     title="RAG Generation Service",
+    description="Ask questions based on provided context. Intended for use in RAG pipelines (i.e. context supplied by semantic retriever service) as an MCP server.",
 )
 # Launch with MCP server enabled

app/prompt.py DELETED Viewed

@@ -1,7 +0,0 @@
-def build_prompt(question: str, context: str) -> str:
-    return (
-        "You are an expert assistant. Answer the USER question using only the "
-        "CONTEXT provided. If the context is insufficient say 'I don't know.'.\n\n"
-        f"### CONTEXT\n{context}\n\n"
-        f"### USER QUESTION\n{question}\n\n### ASSISTANT ANSWER\n"
-    )

app/utils.py ADDED Viewed

	@@ -0,0 +1,178 @@

+import os, asyncio, logging
+import configparser
+import logging
+from dotenv import load_dotenv
+# LangChain imports
+from langchain_openai import ChatOpenAI
+from langchain_anthropic import ChatAnthropic
+from langchain_cohere import ChatCohere
+from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
+from langchain_core.messages import SystemMessage, HumanMessage
+# Local .env file
+load_dotenv()
+def getconfig(configfile_path: str):
+    """
+    Read the config file
+    Params
+    ----------------
+    configfile_path: file path of .cfg file
+    """
+    config = configparser.ConfigParser()
+    try:
+        config.read_file(open(configfile_path))
+        return config
+    except:
+        logging.warning("config file not found")
+# ---------------------------------------------------------------------
+# Provider-agnostic authentication and configuration
+# ---------------------------------------------------------------------
+def get_auth_config(provider: str) -> dict:
+    """Get authentication configuration for different providers"""
+    auth_configs = {
+        "openai": {"api_key": os.getenv("OPENAI_API_KEY")},
+        "huggingface": {"api_key": os.getenv("HF_TOKEN")},
+        "anthropic": {"api_key": os.getenv("ANTHROPIC_API_KEY")},
+        "cohere": {"api_key": os.getenv("COHERE_API_KEY")},
+    }
+    if provider not in auth_configs:
+        raise ValueError(f"Unsupported provider: {provider}")
+    auth_config = auth_configs[provider]
+    api_key = auth_config.get("api_key")
+    if not api_key:
+        raise RuntimeError(f"Missing API key for provider '{provider}'. Please set the appropriate environment variable.")
+    return auth_config
+# ---------------------------------------------------------------------
+# Model / client initialization
+# ---------------------------------------------------------------------
+config = getconfig("params.cfg")
+PROVIDER = config.get("generator", "PROVIDER")
+MODEL = config.get("generator", "MODEL")
+MAX_TOKENS = int(config.get("generator", "MAX_TOKENS"))
+TEMPERATURE = float(config.get("generator", "TEMPERATURE"))
+# Set up authentication for the selected provider
+auth_config = get_auth_config(PROVIDER)
+def get_chat_model():
+    """Initialize the appropriate LangChain chat model based on provider"""
+    common_params = {
+        "temperature": TEMPERATURE,
+        "max_tokens": MAX_TOKENS,
+    }
+    if PROVIDER == "openai":
+        return ChatOpenAI(
+            model=MODEL,
+            openai_api_key=auth_config["api_key"],
+            **common_params
+        )
+    elif PROVIDER == "anthropic":
+        return ChatAnthropic(
+            model=MODEL,
+            anthropic_api_key=auth_config["api_key"],
+            **common_params
+        )
+    elif PROVIDER == "cohere":
+        return ChatCohere(
+            model=MODEL,
+            cohere_api_key=auth_config["api_key"],
+            **common_params
+        )
+    elif PROVIDER == "huggingface":
+        # Initialize HuggingFaceEndpoint with explicit parameters
+        llm = HuggingFaceEndpoint(
+            repo_id=MODEL,
+            huggingfacehub_api_token=auth_config["api_key"],
+            task="text-generation",
+            temperature=TEMPERATURE,
+            max_new_tokens=MAX_TOKENS
+        )
+        return ChatHuggingFace(llm=llm)
+    else:
+        raise ValueError(f"Unsupported provider: {PROVIDER}")
+# Initialize provider-agnostic chat model
+chat_model = get_chat_model()
+# ---------------------------------------------------------------------
+# Core generation function for both Gradio UI and MCP
+# ---------------------------------------------------------------------
+async def _call_llm(messages: list) -> str:
+    """
+    Provider-agnostic LLM call using LangChain.
+    Args:
+        messages: List of LangChain message objects
+    Returns:
+        Generated response content as string
+    """
+    try:
+        # Use async invoke for better performance
+        response = await chat_model.ainvoke(messages)
+        return response.content.strip()
+    except Exception as e:
+        logging.exception(f"LLM generation failed with provider '{PROVIDER}' and model '{MODEL}': {e}")
+        raise
+def build_messages(question: str, context: str) -> list:
+    """
+    Build messages in LangChain format.
+    Args:
+        question: The user's question
+        context: The relevant context for answering
+    Returns:
+        List of LangChain message objects
+    """
+    system_content = (
+        "You are an expert assistant. Answer the USER question using only the "
+        "CONTEXT provided. If the context is insufficient say 'I don't know.'"
+    )
+    user_content = f"### CONTEXT\n{context}\n\n### USER QUESTION\n{question}"
+    return [
+        SystemMessage(content=system_content),
+        HumanMessage(content=user_content)
+    ]
+async def rag_generate(query: str, context: str) -> str:
+    """
+    Generate an answer to a query using provided context through RAG.
+    This function takes a user query and relevant context, then uses a language model
+    to generate a comprehensive answer based on the provided information.
+    Args:
+        query (str): The user's question or query
+        context (str): The relevant context/documents to use for answering
+    Returns:
+        str: The generated answer based on the query and context
+    """
+    if not query.strip():
+        return "Error: Query cannot be empty"
+    if not context.strip():
+        return "Error: Context cannot be empty"
+    try:
+        messages = build_messages(query, context)
+        answer = await _call_llm(messages)
+        return answer
+    except Exception as e:
+        logging.exception("Generation failed")
+        return f"Error: {str(e)}"

params.cfg ADDED Viewed

	@@ -0,0 +1,35 @@

+[generator]
+PROVIDER = huggingface
+MODEL = meta-llama/Meta-Llama-3-8B-Instruct
+MAX_TOKENS = 512
+TEMPERATURE = 0.2
+## OpenAI
+# [generator]
+# PROVIDER = openai
+# MODEL = gpt-4o
+# MAX_TOKENS = 512
+# TEMPERATURE = 0.2
+## Anthropic
+# [generator]
+# PROVIDER = anthropic
+# MODEL = claude-3-haiku-20240307
+# MAX_TOKENS = 512
+# TEMPERATURE = 0.2
+## Cohere
+# [generator]
+# PROVIDER = cohere
+# MODEL = command
+# MAX_TOKENS = 512
+# TEMPERATURE = 0.2
+## Environment Variables Required
+# Make sure to set the appropriate environment variables:
+# - OpenAI: `OPENAI_API_KEY`
+# - Anthropic: `ANTHROPIC_API_KEY`
+# - Cohere: `COHERE_API_KEY`
+# - HuggingFace: `HF_TOKEN`

requirements.txt CHANGED Viewed

@@ -1,5 +1,19 @@
-fastapi
-gradio[mcp]>=4.26.0
-huggingface_hub>=0.32.6
-pydantic>=2
-uvicorn[standard]

+# Core dependencies
+gradio>=4.0.0
+gradio[mcp]
+python-dotenv>=1.0.0
+# LangChain core
+langchain-core>=0.1.0
+langchain-community>=0.0.1
+# Provider-specific LangChain packages
+langchain-openai>=0.1.0
+langchain-anthropic>=0.1.0
+langchain-cohere>=0.1.0
+langchain-together>=0.1.0
+langchain-huggingface>=0.0.1
+# Additional dependencies that might be needed
+requests>=2.31.0
+pydantic>=2.0.0