mtyrrell commited on
Commit
9a00c34
·
1 Parent(s): fe689aa
Files changed (6) hide show
  1. .DS_Store +0 -0
  2. Dockerfile +22 -0
  3. README.md +29 -5
  4. app/main.py +128 -0
  5. app/prompt.py +7 -0
  6. requirements.txt +5 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -------- base image --------
2
+ FROM python:3.11-slim
3
+
4
+ ENV PYTHONUNBUFFERED=1 \
5
+ OMP_NUM_THREADS=1 \
6
+ TOKENIZERS_PARALLELISM=false
7
+ #GRADIO_MCP_SERVER=True
8
+
9
+ # -------- install deps --------
10
+ WORKDIR /app
11
+ COPY requirements.txt .
12
+ RUN pip install --no-cache-dir -r requirements.txt
13
+
14
+ # -------- copy source --------
15
+ COPY app ./app
16
+ COPY model_params.cfg .
17
+
18
+ # Ports:
19
+ # • 7860 → Gradio UI (HF Spaces standard)
20
+ EXPOSE 7860
21
+
22
+ CMD ["python", "-m", "app.main"]
README.md CHANGED
@@ -1,10 +1,34 @@
1
  ---
2
- title: Mcp Mod Test
3
- emoji: 📉
4
- colorFrom: pink
5
- colorTo: blue
6
  sdk: docker
7
  pinned: false
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: RAG Generation Service
3
+ emoji: 🤖
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: docker
7
  pinned: false
8
+ license: mit
9
  ---
10
 
11
+ # RAG Generation Service
12
+
13
+ This is a Retrieval-Augmented Generation (RAG) service that answers questions based on provided context.
14
+
15
+ ## How to use
16
+
17
+ 1. Enter your question in the "Query" field
18
+ 2. Paste relevant documents or context in the "Context" field
19
+ 3. Click submit to get an AI-generated answer based on your context
20
+
21
+ ## Features
22
+
23
+ - Uses state-of-the-art language models via Hugging Face Inference API
24
+ - Supports multiple model providers
25
+ - Clean, intuitive interface
26
+ - Example queries to get started
27
+
28
+ ## Configuration
29
+
30
+ This Space requires a `HF_TOKEN` environment variable to be set with your Hugging Face access token.
31
+
32
+ ## Model Support
33
+
34
+ By default, this uses `meta-llama/Meta-Llama-3-8B-Instruct`, but you can configure different models via environment variables.
app/main.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, asyncio, logging
2
+ import gradio as gr
3
+ from huggingface_hub import InferenceClient
4
+ from .prompt import build_prompt
5
+
6
+ # ---------------------------------------------------------------------
7
+ # model / client initialisation
8
+ # ---------------------------------------------------------------------
9
+ HF_TOKEN = os.getenv("HF_TOKEN")
10
+ MODEL_ID = os.getenv("MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct")
11
+ MAX_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "512"))
12
+ TEMPERATURE = float(os.getenv("TEMPERATURE", "0.2"))
13
+
14
+ if not HF_TOKEN:
15
+ raise RuntimeError(
16
+ "HF_TOKEN env-var missing. "
17
+ )
18
+
19
+ client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
20
+
21
+ # ---------------------------------------------------------------------
22
+ # Core generation function for both Gradio UI and MCP
23
+ # ---------------------------------------------------------------------
24
+ async def _call_llm(prompt: str) -> str:
25
+ """
26
+ Try text_generation first (for models/providers that still support it);
27
+ fall back to chat_completion when the provider is chat-only (Novita, etc.).
28
+ """
29
+ try:
30
+ # hf-inference
31
+ return await asyncio.to_thread(
32
+ client.text_generation,
33
+ prompt,
34
+ max_new_tokens=MAX_TOKENS,
35
+ temperature=TEMPERATURE,
36
+ )
37
+ except ValueError as e:
38
+ if "Supported task: conversational" not in str(e):
39
+ raise # genuine error → bubble up
40
+
41
+ # fallback for Novita
42
+ messages = [{"role": "user", "content": prompt}]
43
+ completion = await asyncio.to_thread(
44
+ client.chat_completion,
45
+ messages=messages,
46
+ model=MODEL_ID,
47
+ max_tokens=MAX_TOKENS,
48
+ temperature=TEMPERATURE,
49
+ )
50
+ return completion.choices[0].message.content.strip()
51
+
52
+ async def rag_generate(query: str, context: str) -> str:
53
+ """
54
+ Generate an answer to a query using provided context through RAG.
55
+
56
+ This function takes a user query and relevant context, then uses a language model
57
+ to generate a comprehensive answer based on the provided information.
58
+
59
+ Args:
60
+ query (str): The user's question or query
61
+ context (str): The relevant context/documents to use for answering
62
+
63
+ Returns:
64
+ str: The generated answer based on the query and context
65
+ """
66
+ if not query.strip():
67
+ return "Error: Query cannot be empty"
68
+
69
+ if not context.strip():
70
+ return "Error: Context cannot be empty"
71
+
72
+ prompt = build_prompt(query, context)
73
+ try:
74
+ answer = await _call_llm(prompt)
75
+ return answer
76
+ except Exception as e:
77
+ logging.exception("Generation failed")
78
+ return f"Error: {str(e)}"
79
+
80
+ # ---------------------------------------------------------------------
81
+ # Gradio Interface with MCP support
82
+ # ---------------------------------------------------------------------
83
+ ui = gr.Interface(
84
+ fn=rag_generate,
85
+ inputs=[
86
+ gr.Textbox(
87
+ label="Query",
88
+ lines=2,
89
+ placeholder="What would you like to know?",
90
+ info="Enter your question here"
91
+ ),
92
+ gr.Textbox(
93
+ label="Context",
94
+ lines=8,
95
+ placeholder="Paste relevant documents or context here...",
96
+ info="Provide the context/documents to use for answering"
97
+ ),
98
+ ],
99
+ outputs=gr.Textbox(
100
+ label="Generated Answer",
101
+ lines=6,
102
+ show_copy_button=True
103
+ ),
104
+ title="RAG Generation Service",
105
+ description="Ask questions and get answers based on your provided context. This service is also available as an MCP server for integration with AI applications.",
106
+ examples=[
107
+ [
108
+ "What is the main benefit mentioned?",
109
+ "Machine learning has revolutionized many industries. The main benefit is increased efficiency and accuracy in data processing."
110
+ ],
111
+ [
112
+ "Who is the CEO?",
113
+ "Company ABC was founded in 2020. The current CEO is Jane Smith, who has led the company to significant growth."
114
+ ]
115
+ ]
116
+ )
117
+
118
+ # Launch with MCP server enabled
119
+ if __name__ == "__main__":
120
+ ui.launch(
121
+ server_name="0.0.0.0",
122
+ server_port=7860,
123
+ mcp_server=True,
124
+ show_error=True
125
+ )
126
+
127
+
128
+
app/prompt.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ def build_prompt(question: str, context: str) -> str:
2
+ return (
3
+ "You are an expert assistant. Answer the USER question using only the "
4
+ "CONTEXT provided. If the context is insufficient say 'I don't know.'.\n\n"
5
+ f"### CONTEXT\n{context}\n\n"
6
+ f"### USER QUESTION\n{question}\n\n### ASSISTANT ANSWER\n"
7
+ )
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi
2
+ gradio[mcp]>=4.26.0
3
+ huggingface_hub>=0.32.6
4
+ pydantic>=2
5
+ uvicorn[standard]