mcp_mod_test

Sleeping

App Files Files Community

mtyrrell commited on Jun 14

Commit

9a00c34

1 Parent(s): fe689aa

test

Browse files

Files changed (6) hide show

.DS_Store +0 -0
Dockerfile +22 -0
README.md +29 -5
app/main.py +128 -0
app/prompt.py +7 -0
requirements.txt +5 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

Dockerfile ADDED Viewed

	@@ -0,0 +1,22 @@

+# -------- base image --------
+FROM python:3.11-slim
+ENV PYTHONUNBUFFERED=1 \
+    OMP_NUM_THREADS=1 \
+    TOKENIZERS_PARALLELISM=false
+    #GRADIO_MCP_SERVER=True
+# -------- install deps --------
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# -------- copy source --------
+COPY app ./app
+COPY model_params.cfg .
+# Ports:
+#  • 7860 → Gradio UI (HF Spaces standard)
+EXPOSE 7860
+CMD ["python", "-m", "app.main"]

README.md CHANGED Viewed

@@ -1,10 +1,34 @@
 ---
-title: Mcp Mod Test
-emoji: 📉
-colorFrom: pink
-colorTo: blue
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: RAG Generation Service
+emoji: 🤖
+colorFrom: blue
+colorTo: purple
 sdk: docker
 pinned: false
+license: mit
 ---
+# RAG Generation Service
+This is a Retrieval-Augmented Generation (RAG) service that answers questions based on provided context.
+## How to use
+1. Enter your question in the "Query" field
+2. Paste relevant documents or context in the "Context" field
+3. Click submit to get an AI-generated answer based on your context
+## Features
+- Uses state-of-the-art language models via Hugging Face Inference API
+- Supports multiple model providers
+- Clean, intuitive interface
+- Example queries to get started
+## Configuration
+This Space requires a `HF_TOKEN` environment variable to be set with your Hugging Face access token.
+## Model Support
+By default, this uses `meta-llama/Meta-Llama-3-8B-Instruct`, but you can configure different models via environment variables.

app/main.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import os, asyncio, logging
+import gradio as gr
+from huggingface_hub import InferenceClient
+from .prompt import build_prompt
+# ---------------------------------------------------------------------
+# model / client initialisation
+# ---------------------------------------------------------------------
+HF_TOKEN  = os.getenv("HF_TOKEN")
+MODEL_ID  = os.getenv("MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct")
+MAX_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "512"))
+TEMPERATURE = float(os.getenv("TEMPERATURE", "0.2"))
+if not HF_TOKEN:
+    raise RuntimeError(
+        "HF_TOKEN env-var missing. "
+    )
+client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
+# ---------------------------------------------------------------------
+# Core generation function for both Gradio UI and MCP
+# ---------------------------------------------------------------------
+async def _call_llm(prompt: str) -> str:
+    """
+    Try text_generation first (for models/providers that still support it);
+    fall back to chat_completion when the provider is chat-only (Novita, etc.).
+    """
+    try:
+        # hf-inference
+        return await asyncio.to_thread(
+            client.text_generation,
+            prompt,
+            max_new_tokens=MAX_TOKENS,
+            temperature=TEMPERATURE,
+        )
+    except ValueError as e:
+        if "Supported task: conversational" not in str(e):
+            raise                              # genuine error → bubble up
+        # fallback for Novita
+        messages = [{"role": "user", "content": prompt}]
+        completion = await asyncio.to_thread(
+            client.chat_completion,
+            messages=messages,
+            model=MODEL_ID,
+            max_tokens=MAX_TOKENS,
+            temperature=TEMPERATURE,
+        )
+        return completion.choices[0].message.content.strip()
+async def rag_generate(query: str, context: str) -> str:
+    """
+    Generate an answer to a query using provided context through RAG.
+    This function takes a user query and relevant context, then uses a language model
+    to generate a comprehensive answer based on the provided information.
+    Args:
+        query (str): The user's question or query
+        context (str): The relevant context/documents to use for answering
+    Returns:
+        str: The generated answer based on the query and context
+    """
+    if not query.strip():
+        return "Error: Query cannot be empty"
+    if not context.strip():
+        return "Error: Context cannot be empty"
+    prompt = build_prompt(query, context)
+    try:
+        answer = await _call_llm(prompt)
+        return answer
+    except Exception as e:
+        logging.exception("Generation failed")
+        return f"Error: {str(e)}"
+# ---------------------------------------------------------------------
+# Gradio Interface with MCP support
+# ---------------------------------------------------------------------
+ui = gr.Interface(
+    fn=rag_generate,
+    inputs=[
+        gr.Textbox(
+            label="Query",
+            lines=2,
+            placeholder="What would you like to know?",
+            info="Enter your question here"
+        ),
+        gr.Textbox(
+            label="Context",
+            lines=8,
+            placeholder="Paste relevant documents or context here...",
+            info="Provide the context/documents to use for answering"
+        ),
+    ],
+    outputs=gr.Textbox(
+        label="Generated Answer",
+        lines=6,
+        show_copy_button=True
+    ),
+    title="RAG Generation Service",
+    description="Ask questions and get answers based on your provided context. This service is also available as an MCP server for integration with AI applications.",
+    examples=[
+        [
+            "What is the main benefit mentioned?",
+            "Machine learning has revolutionized many industries. The main benefit is increased efficiency and accuracy in data processing."
+        ],
+        [
+            "Who is the CEO?",
+            "Company ABC was founded in 2020. The current CEO is Jane Smith, who has led the company to significant growth."
+        ]
+    ]
+)
+# Launch with MCP server enabled
+if __name__ == "__main__":
+    ui.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        mcp_server=True,
+        show_error=True
+    )

app/prompt.py ADDED Viewed

	@@ -0,0 +1,7 @@

+def build_prompt(question: str, context: str) -> str:
+    return (
+        "You are an expert assistant. Answer the USER question using only the "
+        "CONTEXT provided. If the context is insufficient say 'I don't know.'.\n\n"
+        f"### CONTEXT\n{context}\n\n"
+        f"### USER QUESTION\n{question}\n\n### ASSISTANT ANSWER\n"
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+fastapi
+gradio[mcp]>=4.26.0
+huggingface_hub>=0.32.6
+pydantic>=2
+uvicorn[standard]