Spaces:
Sleeping
Sleeping
test
Browse files- .DS_Store +0 -0
- Dockerfile +22 -0
- README.md +29 -5
- app/main.py +128 -0
- app/prompt.py +7 -0
- requirements.txt +5 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
Dockerfile
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -------- base image --------
|
2 |
+
FROM python:3.11-slim
|
3 |
+
|
4 |
+
ENV PYTHONUNBUFFERED=1 \
|
5 |
+
OMP_NUM_THREADS=1 \
|
6 |
+
TOKENIZERS_PARALLELISM=false
|
7 |
+
#GRADIO_MCP_SERVER=True
|
8 |
+
|
9 |
+
# -------- install deps --------
|
10 |
+
WORKDIR /app
|
11 |
+
COPY requirements.txt .
|
12 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
13 |
+
|
14 |
+
# -------- copy source --------
|
15 |
+
COPY app ./app
|
16 |
+
COPY model_params.cfg .
|
17 |
+
|
18 |
+
# Ports:
|
19 |
+
# • 7860 → Gradio UI (HF Spaces standard)
|
20 |
+
EXPOSE 7860
|
21 |
+
|
22 |
+
CMD ["python", "-m", "app.main"]
|
README.md
CHANGED
@@ -1,10 +1,34 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: docker
|
7 |
pinned: false
|
|
|
8 |
---
|
9 |
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: RAG Generation Service
|
3 |
+
emoji: 🤖
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: purple
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
+
license: mit
|
9 |
---
|
10 |
|
11 |
+
# RAG Generation Service
|
12 |
+
|
13 |
+
This is a Retrieval-Augmented Generation (RAG) service that answers questions based on provided context.
|
14 |
+
|
15 |
+
## How to use
|
16 |
+
|
17 |
+
1. Enter your question in the "Query" field
|
18 |
+
2. Paste relevant documents or context in the "Context" field
|
19 |
+
3. Click submit to get an AI-generated answer based on your context
|
20 |
+
|
21 |
+
## Features
|
22 |
+
|
23 |
+
- Uses state-of-the-art language models via Hugging Face Inference API
|
24 |
+
- Supports multiple model providers
|
25 |
+
- Clean, intuitive interface
|
26 |
+
- Example queries to get started
|
27 |
+
|
28 |
+
## Configuration
|
29 |
+
|
30 |
+
This Space requires a `HF_TOKEN` environment variable to be set with your Hugging Face access token.
|
31 |
+
|
32 |
+
## Model Support
|
33 |
+
|
34 |
+
By default, this uses `meta-llama/Meta-Llama-3-8B-Instruct`, but you can configure different models via environment variables.
|
app/main.py
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os, asyncio, logging
|
2 |
+
import gradio as gr
|
3 |
+
from huggingface_hub import InferenceClient
|
4 |
+
from .prompt import build_prompt
|
5 |
+
|
6 |
+
# ---------------------------------------------------------------------
|
7 |
+
# model / client initialisation
|
8 |
+
# ---------------------------------------------------------------------
|
9 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
10 |
+
MODEL_ID = os.getenv("MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct")
|
11 |
+
MAX_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "512"))
|
12 |
+
TEMPERATURE = float(os.getenv("TEMPERATURE", "0.2"))
|
13 |
+
|
14 |
+
if not HF_TOKEN:
|
15 |
+
raise RuntimeError(
|
16 |
+
"HF_TOKEN env-var missing. "
|
17 |
+
)
|
18 |
+
|
19 |
+
client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
|
20 |
+
|
21 |
+
# ---------------------------------------------------------------------
|
22 |
+
# Core generation function for both Gradio UI and MCP
|
23 |
+
# ---------------------------------------------------------------------
|
24 |
+
async def _call_llm(prompt: str) -> str:
|
25 |
+
"""
|
26 |
+
Try text_generation first (for models/providers that still support it);
|
27 |
+
fall back to chat_completion when the provider is chat-only (Novita, etc.).
|
28 |
+
"""
|
29 |
+
try:
|
30 |
+
# hf-inference
|
31 |
+
return await asyncio.to_thread(
|
32 |
+
client.text_generation,
|
33 |
+
prompt,
|
34 |
+
max_new_tokens=MAX_TOKENS,
|
35 |
+
temperature=TEMPERATURE,
|
36 |
+
)
|
37 |
+
except ValueError as e:
|
38 |
+
if "Supported task: conversational" not in str(e):
|
39 |
+
raise # genuine error → bubble up
|
40 |
+
|
41 |
+
# fallback for Novita
|
42 |
+
messages = [{"role": "user", "content": prompt}]
|
43 |
+
completion = await asyncio.to_thread(
|
44 |
+
client.chat_completion,
|
45 |
+
messages=messages,
|
46 |
+
model=MODEL_ID,
|
47 |
+
max_tokens=MAX_TOKENS,
|
48 |
+
temperature=TEMPERATURE,
|
49 |
+
)
|
50 |
+
return completion.choices[0].message.content.strip()
|
51 |
+
|
52 |
+
async def rag_generate(query: str, context: str) -> str:
|
53 |
+
"""
|
54 |
+
Generate an answer to a query using provided context through RAG.
|
55 |
+
|
56 |
+
This function takes a user query and relevant context, then uses a language model
|
57 |
+
to generate a comprehensive answer based on the provided information.
|
58 |
+
|
59 |
+
Args:
|
60 |
+
query (str): The user's question or query
|
61 |
+
context (str): The relevant context/documents to use for answering
|
62 |
+
|
63 |
+
Returns:
|
64 |
+
str: The generated answer based on the query and context
|
65 |
+
"""
|
66 |
+
if not query.strip():
|
67 |
+
return "Error: Query cannot be empty"
|
68 |
+
|
69 |
+
if not context.strip():
|
70 |
+
return "Error: Context cannot be empty"
|
71 |
+
|
72 |
+
prompt = build_prompt(query, context)
|
73 |
+
try:
|
74 |
+
answer = await _call_llm(prompt)
|
75 |
+
return answer
|
76 |
+
except Exception as e:
|
77 |
+
logging.exception("Generation failed")
|
78 |
+
return f"Error: {str(e)}"
|
79 |
+
|
80 |
+
# ---------------------------------------------------------------------
|
81 |
+
# Gradio Interface with MCP support
|
82 |
+
# ---------------------------------------------------------------------
|
83 |
+
ui = gr.Interface(
|
84 |
+
fn=rag_generate,
|
85 |
+
inputs=[
|
86 |
+
gr.Textbox(
|
87 |
+
label="Query",
|
88 |
+
lines=2,
|
89 |
+
placeholder="What would you like to know?",
|
90 |
+
info="Enter your question here"
|
91 |
+
),
|
92 |
+
gr.Textbox(
|
93 |
+
label="Context",
|
94 |
+
lines=8,
|
95 |
+
placeholder="Paste relevant documents or context here...",
|
96 |
+
info="Provide the context/documents to use for answering"
|
97 |
+
),
|
98 |
+
],
|
99 |
+
outputs=gr.Textbox(
|
100 |
+
label="Generated Answer",
|
101 |
+
lines=6,
|
102 |
+
show_copy_button=True
|
103 |
+
),
|
104 |
+
title="RAG Generation Service",
|
105 |
+
description="Ask questions and get answers based on your provided context. This service is also available as an MCP server for integration with AI applications.",
|
106 |
+
examples=[
|
107 |
+
[
|
108 |
+
"What is the main benefit mentioned?",
|
109 |
+
"Machine learning has revolutionized many industries. The main benefit is increased efficiency and accuracy in data processing."
|
110 |
+
],
|
111 |
+
[
|
112 |
+
"Who is the CEO?",
|
113 |
+
"Company ABC was founded in 2020. The current CEO is Jane Smith, who has led the company to significant growth."
|
114 |
+
]
|
115 |
+
]
|
116 |
+
)
|
117 |
+
|
118 |
+
# Launch with MCP server enabled
|
119 |
+
if __name__ == "__main__":
|
120 |
+
ui.launch(
|
121 |
+
server_name="0.0.0.0",
|
122 |
+
server_port=7860,
|
123 |
+
mcp_server=True,
|
124 |
+
show_error=True
|
125 |
+
)
|
126 |
+
|
127 |
+
|
128 |
+
|
app/prompt.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def build_prompt(question: str, context: str) -> str:
|
2 |
+
return (
|
3 |
+
"You are an expert assistant. Answer the USER question using only the "
|
4 |
+
"CONTEXT provided. If the context is insufficient say 'I don't know.'.\n\n"
|
5 |
+
f"### CONTEXT\n{context}\n\n"
|
6 |
+
f"### USER QUESTION\n{question}\n\n### ASSISTANT ANSWER\n"
|
7 |
+
)
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi
|
2 |
+
gradio[mcp]>=4.26.0
|
3 |
+
huggingface_hub>=0.32.6
|
4 |
+
pydantic>=2
|
5 |
+
uvicorn[standard]
|