Spaces:
Running
Running
Fix examples formatting and remove RAG functionality
Browse files- Fixed examples processing in generate_zip function to use repr() instead of json.dumps()
- This ensures examples are properly formatted as Python literals for gr.ChatInterface
- Updated theme specification from gr.themes.Default() to 'default' string
- Removed RAG functionality files: rag_tool.py and vector_store.py
- Enhanced support_docs.py with placeholder comments for future image integration
- Updated export_conversation_to_markdown to include configuration metadata
- app.py +1 -1
- rag_tool.py +0 -208
- support_docs.py +290 -116
- vector_store.py +0 -308
app.py
CHANGED
@@ -1603,7 +1603,7 @@ with gr.Blocks(
|
|
1603 |
border-radius: 6px;
|
1604 |
}
|
1605 |
""",
|
1606 |
-
theme=
|
1607 |
head="""
|
1608 |
<style>
|
1609 |
/* Additional head styles to prevent manifest issues */
|
|
|
1603 |
border-radius: 6px;
|
1604 |
}
|
1605 |
""",
|
1606 |
+
theme="default",
|
1607 |
head="""
|
1608 |
<style>
|
1609 |
/* Additional head styles to prevent manifest issues */
|
rag_tool.py
DELETED
@@ -1,208 +0,0 @@
|
|
1 |
-
import json
|
2 |
-
from typing import List, Dict, Any, Optional, Tuple
|
3 |
-
from document_processor import DocumentProcessor, DocumentChunk
|
4 |
-
from vector_store import VectorStore, SearchResult
|
5 |
-
import os
|
6 |
-
import tempfile
|
7 |
-
from pathlib import Path
|
8 |
-
|
9 |
-
|
10 |
-
class RAGTool:
|
11 |
-
"""RAG tool for integrating document search with chat"""
|
12 |
-
|
13 |
-
def __init__(self):
|
14 |
-
self.processor = DocumentProcessor(chunk_size=800, chunk_overlap=100)
|
15 |
-
self.vector_store = VectorStore()
|
16 |
-
self.processed_files = []
|
17 |
-
self.total_chunks = 0
|
18 |
-
|
19 |
-
def process_uploaded_files(self, file_paths: List[str]) -> Dict[str, Any]:
|
20 |
-
"""Process uploaded files and build vector index"""
|
21 |
-
|
22 |
-
# Validate files
|
23 |
-
valid_files = []
|
24 |
-
errors = []
|
25 |
-
|
26 |
-
for file_path in file_paths:
|
27 |
-
try:
|
28 |
-
# Check file size (10MB limit)
|
29 |
-
size_mb = os.path.getsize(file_path) / (1024 * 1024)
|
30 |
-
if size_mb > 10:
|
31 |
-
errors.append({
|
32 |
-
'file': Path(file_path).name,
|
33 |
-
'error': f'File too large ({size_mb:.1f}MB). Maximum size is 10MB.'
|
34 |
-
})
|
35 |
-
continue
|
36 |
-
|
37 |
-
valid_files.append(file_path)
|
38 |
-
|
39 |
-
except Exception as e:
|
40 |
-
errors.append({
|
41 |
-
'file': Path(file_path).name,
|
42 |
-
'error': str(e)
|
43 |
-
})
|
44 |
-
|
45 |
-
if not valid_files:
|
46 |
-
return {
|
47 |
-
'success': False,
|
48 |
-
'message': 'No valid files to process',
|
49 |
-
'errors': errors
|
50 |
-
}
|
51 |
-
|
52 |
-
# Process files
|
53 |
-
all_chunks, summary = self.processor.process_multiple_files(valid_files)
|
54 |
-
|
55 |
-
if not all_chunks:
|
56 |
-
return {
|
57 |
-
'success': False,
|
58 |
-
'message': 'No content extracted from files',
|
59 |
-
'summary': summary
|
60 |
-
}
|
61 |
-
|
62 |
-
# Build vector index
|
63 |
-
chunk_dicts = [chunk.to_dict() for chunk in all_chunks]
|
64 |
-
self.vector_store.build_index(chunk_dicts, show_progress=False)
|
65 |
-
|
66 |
-
# Update stats
|
67 |
-
self.processed_files = summary['files_processed']
|
68 |
-
self.total_chunks = len(all_chunks)
|
69 |
-
|
70 |
-
# Calculate index size
|
71 |
-
index_stats = self.vector_store.get_stats()
|
72 |
-
|
73 |
-
return {
|
74 |
-
'success': True,
|
75 |
-
'message': f'Successfully processed {len(valid_files)} files into {self.total_chunks} chunks',
|
76 |
-
'summary': summary,
|
77 |
-
'index_stats': index_stats,
|
78 |
-
'errors': errors
|
79 |
-
}
|
80 |
-
|
81 |
-
def get_relevant_context(self, query: str, max_chunks: int = 3) -> str:
|
82 |
-
"""Get relevant context for a query"""
|
83 |
-
if not self.vector_store.index:
|
84 |
-
return ""
|
85 |
-
|
86 |
-
# Search for relevant chunks
|
87 |
-
results = self.vector_store.search(
|
88 |
-
query=query,
|
89 |
-
top_k=max_chunks,
|
90 |
-
score_threshold=0.3
|
91 |
-
)
|
92 |
-
|
93 |
-
if not results:
|
94 |
-
return ""
|
95 |
-
|
96 |
-
# Format context
|
97 |
-
context_parts = []
|
98 |
-
|
99 |
-
for i, result in enumerate(results, 1):
|
100 |
-
file_name = result.metadata.get('file_name', 'Unknown')
|
101 |
-
context_parts.append(
|
102 |
-
f"[Document: {file_name} - Relevance: {result.score:.2f}]\n{result.text}"
|
103 |
-
)
|
104 |
-
|
105 |
-
return "\n\n".join(context_parts)
|
106 |
-
|
107 |
-
def get_serialized_data(self) -> Dict[str, Any]:
|
108 |
-
"""Get serialized data for deployment"""
|
109 |
-
if not self.vector_store.index:
|
110 |
-
return None
|
111 |
-
|
112 |
-
return self.vector_store.serialize()
|
113 |
-
|
114 |
-
def get_deployment_info(self) -> Dict[str, Any]:
|
115 |
-
"""Get information for deployment package"""
|
116 |
-
if not self.vector_store.index:
|
117 |
-
return {
|
118 |
-
'enabled': False,
|
119 |
-
'message': 'No documents processed'
|
120 |
-
}
|
121 |
-
|
122 |
-
# Estimate package size increase
|
123 |
-
index_stats = self.vector_store.get_stats()
|
124 |
-
estimated_size_mb = (
|
125 |
-
# Index size estimation
|
126 |
-
(index_stats['total_chunks'] * index_stats['dimension'] * 4) / (1024 * 1024) +
|
127 |
-
# Chunks text size estimation
|
128 |
-
(sum(len(chunk['text']) for chunk in self.vector_store.chunks.values()) / (1024 * 1024))
|
129 |
-
) * 1.5 # Add overhead for base64 encoding
|
130 |
-
|
131 |
-
return {
|
132 |
-
'enabled': True,
|
133 |
-
'total_files': len(self.processed_files),
|
134 |
-
'total_chunks': self.total_chunks,
|
135 |
-
'estimated_size_mb': round(estimated_size_mb, 2),
|
136 |
-
'files': [f['name'] for f in self.processed_files]
|
137 |
-
}
|
138 |
-
|
139 |
-
|
140 |
-
def create_rag_module_for_space(serialized_data: Dict[str, Any]) -> str:
|
141 |
-
"""Create a minimal RAG module for the deployed space"""
|
142 |
-
|
143 |
-
return '''# RAG Module for deployed space
|
144 |
-
import numpy as np
|
145 |
-
import faiss
|
146 |
-
import base64
|
147 |
-
import json
|
148 |
-
|
149 |
-
class RAGContext:
|
150 |
-
def __init__(self, serialized_data):
|
151 |
-
# Deserialize FAISS index
|
152 |
-
index_bytes = base64.b64decode(serialized_data['index_base64'])
|
153 |
-
self.index = faiss.deserialize_index(index_bytes)
|
154 |
-
|
155 |
-
# Restore chunks and mappings
|
156 |
-
self.chunks = serialized_data['chunks']
|
157 |
-
self.chunk_ids = serialized_data['chunk_ids']
|
158 |
-
|
159 |
-
def get_context(self, query_embedding, max_chunks=3):
|
160 |
-
"""Get relevant context using pre-computed embedding"""
|
161 |
-
if not self.index:
|
162 |
-
return ""
|
163 |
-
|
164 |
-
# Normalize and search
|
165 |
-
faiss.normalize_L2(query_embedding)
|
166 |
-
scores, indices = self.index.search(query_embedding, max_chunks)
|
167 |
-
|
168 |
-
# Format results
|
169 |
-
context_parts = []
|
170 |
-
|
171 |
-
for score, idx in zip(scores[0], indices[0]):
|
172 |
-
if idx < 0 or score < 0.3:
|
173 |
-
continue
|
174 |
-
|
175 |
-
chunk = self.chunks[self.chunk_ids[idx]]
|
176 |
-
file_name = chunk.get('metadata', {}).get('file_name', 'Document')
|
177 |
-
|
178 |
-
context_parts.append(
|
179 |
-
f"[{file_name} - Relevance: {score:.2f}]\\n{chunk['text']}"
|
180 |
-
)
|
181 |
-
|
182 |
-
return "\\n\\n".join(context_parts) if context_parts else ""
|
183 |
-
|
184 |
-
# Initialize RAG context
|
185 |
-
RAG_DATA = json.loads(\'\'\'{{rag_data_json}}\'\'\')
|
186 |
-
rag_context = RAGContext(RAG_DATA) if RAG_DATA else None
|
187 |
-
|
188 |
-
def get_rag_context(query):
|
189 |
-
"""Get relevant context for a query"""
|
190 |
-
if not rag_context:
|
191 |
-
return ""
|
192 |
-
|
193 |
-
# In production, you'd compute query embedding here
|
194 |
-
# For now, return empty (would need embedding service)
|
195 |
-
return ""
|
196 |
-
'''
|
197 |
-
|
198 |
-
|
199 |
-
def format_context_for_prompt(context: str, query: str) -> str:
|
200 |
-
"""Format RAG context for inclusion in prompt"""
|
201 |
-
if not context:
|
202 |
-
return ""
|
203 |
-
|
204 |
-
return f"""Relevant context from uploaded documents:
|
205 |
-
|
206 |
-
{context}
|
207 |
-
|
208 |
-
Please use the above context to help answer the user's question: {query}"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
support_docs.py
CHANGED
@@ -1,5 +1,16 @@
|
|
1 |
"""
|
2 |
Support documentation module with accordion-style help sections
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
"""
|
4 |
|
5 |
import gradio as gr
|
@@ -11,25 +22,39 @@ def create_support_docs():
|
|
11 |
|
12 |
with gr.Column():
|
13 |
gr.Markdown("# Support Documentation")
|
|
|
14 |
gr.Markdown("Complete step-by-step guidance for creating and deploying chat interfaces with HuggingFace Spaces.")
|
15 |
|
16 |
with gr.Accordion("🚀 Getting Started", open=True):
|
17 |
gr.Markdown("""
|
18 |
### Quick Start Guide
|
|
|
|
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
|
|
33 |
**Prerequisites:**
|
34 |
- HuggingFace account (free at huggingface.co)
|
35 |
- OpenRouter API key (get at openrouter.ai/keys)
|
@@ -39,58 +64,85 @@ def create_support_docs():
|
|
39 |
with gr.Accordion("⚙️ Space Settings", open=False):
|
40 |
gr.Markdown("""
|
41 |
### Space Configuration Fields
|
|
|
42 |
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
-
|
|
|
52 |
- **[google/gemma-3-27b-it](https://openrouter.ai/models/google/gemma-3-27b-it)**: Open-source, sustainable option with excellent performance
|
53 |
- **[google/gemini-2.0-flash-001](https://openrouter.ai/models/google/gemini-2.0-flash-001)**: Fast, reliable, good for general tasks
|
54 |
-
- **[mistralai/mistral-medium](https://openrouter.ai/models/mistralai/mistral-medium)**: Good for technical topics
|
55 |
-
- **[openai/gpt-4o-nano](https://openrouter.ai/models/openai/gpt-4o-nano)**: Balanced performance and cost
|
56 |
- **[anthropic/claude-3.5-haiku](https://openrouter.ai/models/anthropic/claude-3.5-haiku)**: Great for complex reasoning and analysis
|
57 |
-
|
58 |
-
**API Key Variable Name**
|
59 |
-
- Default: `OPENROUTER_API_KEY`
|
60 |
-
- This is the secret name you'll create in HuggingFace Space settings
|
61 |
-
- Only change if you have specific naming requirements
|
62 |
-
|
63 |
-
**Access Code (Optional)**
|
64 |
-
- Leave empty for public access
|
65 |
-
- Set a code to restrict access to students/specific users
|
66 |
-
- Code is stored securely as an environment variable
|
67 |
""")
|
68 |
|
69 |
-
with gr.Accordion("🤖
|
70 |
gr.Markdown("""
|
71 |
### System Prompt Design
|
|
|
72 |
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
|
|
75 |
**Best Practices:**
|
76 |
- Be specific about the assistant's role and purpose
|
77 |
- Include behavioral guidelines and constraints
|
78 |
- Mention the intended audience (students, researchers, etc.)
|
79 |
- List key capabilities and tasks
|
80 |
|
81 |
-
**Research Template**
|
82 |
-
- Pre-configured for academic research assistance
|
83 |
-
- Includes MLA citation formatting
|
84 |
-
- Emphasizes fact-checking and evidence-based responses
|
85 |
-
- Automatically enables dynamic URL fetching
|
86 |
-
|
87 |
-
**Custom Categories**
|
88 |
-
- Break down your system prompt into structured sections:
|
89 |
-
- **Role and Purpose**: What is the assistant and what does it do?
|
90 |
-
- **Intended Audience**: Who will use this assistant?
|
91 |
-
- **Key Tasks**: What specific capabilities should it have?
|
92 |
-
- **Additional Context**: Extra instructions or constraints
|
93 |
-
|
94 |
### Copy-Pasteable System Prompts
|
95 |
|
96 |
**Biology Course Assistant:**
|
@@ -127,9 +179,36 @@ def create_support_docs():
|
|
127 |
with gr.Accordion("🔬 Preview Tab Usage", open=False):
|
128 |
gr.Markdown("""
|
129 |
### Testing Your Assistant Before Deployment
|
|
|
130 |
|
131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
|
|
|
133 |
**How to Use the Preview:**
|
134 |
1. **Configure First**: Complete your setup in the Configuration tab
|
135 |
2. **Click Preview Button**: Use "Preview Deployment Package" to activate the preview
|
@@ -137,18 +216,9 @@ def create_support_docs():
|
|
137 |
4. **Test URL Context**: Add URLs to test grounding functionality
|
138 |
5. **Export Conversations**: Save chat logs for analysis
|
139 |
|
140 |
-
**Preview Features:**
|
141 |
-
- **Real API Integration**: Uses actual OpenRouter API when `OPENROUTER_API_KEY` is set
|
142 |
-
- **Configuration Display**: Shows your current assistant setup
|
143 |
-
- **URL Testing**: Add up to 4 URLs for context testing
|
144 |
-
- **Dynamic URL Management**: Add/remove URL fields as needed
|
145 |
-
- **Chat Export**: Download conversation logs as markdown files
|
146 |
-
- **Clear Function**: Reset chat history for new tests
|
147 |
-
|
148 |
**Preview Requirements:**
|
149 |
- Set `OPENROUTER_API_KEY` environment variable for real API testing
|
150 |
- Without API key: Shows configuration but no actual chat responses
|
151 |
-
- All other features (URL fetching, configuration) work without API key
|
152 |
|
153 |
**Testing Best Practices:**
|
154 |
- Test different types of queries to validate assistant behavior
|
@@ -160,20 +230,36 @@ def create_support_docs():
|
|
160 |
with gr.Accordion("💬 Example Prompts", open=False):
|
161 |
gr.Markdown("""
|
162 |
### Creating Effective Example Prompts
|
|
|
163 |
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
|
|
|
177 |
**Examples by Use Case:**
|
178 |
|
179 |
**Course Assistant:**
|
@@ -201,40 +287,42 @@ def create_support_docs():
|
|
201 |
with gr.Accordion("🔧 Tool Settings & Configuration", open=False):
|
202 |
gr.Markdown("""
|
203 |
### Configuration Tab Structure
|
|
|
204 |
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
|
|
|
229 |
### Tool Integration Options
|
230 |
|
231 |
-
**Web Search (crawl4ai)**
|
232 |
-
- Real-time web searching using DuckDuckGo
|
233 |
-
- Advanced content extraction and crawling
|
234 |
-
- Automatically enabled with Research Template
|
235 |
-
|
236 |
-
# Document RAG functionality removed
|
237 |
-
|
238 |
**URL Grounding (Static Context)**
|
239 |
- Add 2-4 URLs for consistent context across all responses
|
240 |
- Content fetched once during generation and cached
|
@@ -250,20 +338,36 @@ def create_support_docs():
|
|
250 |
with gr.Accordion("🎛️ Advanced Settings", open=False):
|
251 |
gr.Markdown("""
|
252 |
### Model Parameters
|
|
|
253 |
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
266 |
|
|
|
267 |
**Token Usage Notes:**
|
268 |
- Tokens include both input (your prompt + context) and output
|
269 |
- Longer contexts (URLs) use more input tokens
|
@@ -273,6 +377,37 @@ def create_support_docs():
|
|
273 |
with gr.Accordion("🚀 Deployment Process", open=False):
|
274 |
gr.Markdown("""
|
275 |
### Quick Deployment Guide
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
|
277 |
**1. Generate & Upload**
|
278 |
- Click "Generate Deployment Package" → download zip
|
@@ -355,8 +490,6 @@ def create_support_docs():
|
|
355 |
- Check for typos in the access code
|
356 |
- Case-sensitive matching
|
357 |
|
358 |
-
# Document RAG functionality removed
|
359 |
-
|
360 |
**URLs not fetching content**
|
361 |
- Check URLs are publicly accessible
|
362 |
- Some sites block automated requests
|
@@ -429,17 +562,58 @@ def create_support_docs():
|
|
429 |
- Language practice partners
|
430 |
""")
|
431 |
|
432 |
-
def export_conversation_to_markdown(conversation_history):
|
433 |
-
"""Export conversation history to markdown format"""
|
434 |
if not conversation_history:
|
435 |
return "No conversation to export."
|
436 |
|
437 |
markdown_content = f"""# Conversation Export
|
438 |
Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
439 |
|
440 |
-
|
|
|
|
|
|
|
|
|
441 |
|
442 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
443 |
|
444 |
for i, message in enumerate(conversation_history):
|
445 |
if isinstance(message, dict):
|
|
|
1 |
"""
|
2 |
Support documentation module with accordion-style help sections
|
3 |
+
|
4 |
+
IMAGE PLACEHOLDERS TO REPLACE:
|
5 |
+
1. interface_overview.png - Three-tab interface overview (Getting Started)
|
6 |
+
2. configuration_tab.png - Configuration tab screenshot (Space Settings)
|
7 |
+
3. system_prompt_interface.png - System prompt and template interface (Assistant Configuration)
|
8 |
+
4. preview_tab.png - Preview tab with chat interface (Preview Tab Usage)
|
9 |
+
5. example_prompts.png - Example prompts input and display (Example Prompts)
|
10 |
+
6. tool_settings_diagram.png - Configuration structure diagram (Tool Settings)
|
11 |
+
7. advanced_settings.png - Temperature and token controls (Advanced Settings)
|
12 |
+
8. deployment_process.png - Deployment process flow diagram (Deployment Process)
|
13 |
+
9. secret.png - HuggingFace secret configuration (already exists)
|
14 |
"""
|
15 |
|
16 |
import gradio as gr
|
|
|
22 |
|
23 |
with gr.Column():
|
24 |
gr.Markdown("# Support Documentation")
|
25 |
+
gr.Markdown("*Under construction - images coming soon!*")
|
26 |
gr.Markdown("Complete step-by-step guidance for creating and deploying chat interfaces with HuggingFace Spaces.")
|
27 |
|
28 |
with gr.Accordion("🚀 Getting Started", open=True):
|
29 |
gr.Markdown("""
|
30 |
### Quick Start Guide
|
31 |
+
This guide helps you set up your first chat interface on HuggingFace Spaces using Gradio. Follow these steps to create a functional assistant with URL grounding and example prompts.
|
32 |
+
""")
|
33 |
|
34 |
+
# TODO: Add interface overview screenshot
|
35 |
+
with gr.Row():
|
36 |
+
with gr.Column(scale=1):
|
37 |
+
gr.Image(
|
38 |
+
value="interface_overview.png", # Placeholder for interface overview screenshot
|
39 |
+
label="Three-Tab Interface: Configuration, Preview, and Support",
|
40 |
+
show_label=True,
|
41 |
+
interactive=False,
|
42 |
+
width=600,
|
43 |
+
height=400,
|
44 |
+
container=False
|
45 |
+
)
|
46 |
+
with gr.Column(scale=1):
|
47 |
+
gr.Markdown("""
|
48 |
+
**Workflow Steps:**
|
49 |
+
1. **Configure** your space settings
|
50 |
+
2. **Set up** assistant with system prompt
|
51 |
+
3. **Enable** tools like URL grounding
|
52 |
+
4. **Preview** & test your configuration
|
53 |
+
5. **Generate** deployment package
|
54 |
+
6. **Deploy** to HuggingFace
|
55 |
+
""")
|
56 |
|
57 |
+
gr.Markdown("""
|
58 |
**Prerequisites:**
|
59 |
- HuggingFace account (free at huggingface.co)
|
60 |
- OpenRouter API key (get at openrouter.ai/keys)
|
|
|
64 |
with gr.Accordion("⚙️ Space Settings", open=False):
|
65 |
gr.Markdown("""
|
66 |
### Space Configuration Fields
|
67 |
+
""")
|
68 |
|
69 |
+
# TODO: Add configuration tab screenshot
|
70 |
+
with gr.Row():
|
71 |
+
with gr.Column(scale=1):
|
72 |
+
gr.Image(
|
73 |
+
value="configuration_tab.png", # Placeholder for configuration tab screenshot
|
74 |
+
label="Configuration Tab Interface",
|
75 |
+
show_label=True,
|
76 |
+
interactive=False,
|
77 |
+
width=500,
|
78 |
+
height=400,
|
79 |
+
container=False
|
80 |
+
)
|
81 |
+
with gr.Column(scale=1):
|
82 |
+
gr.Markdown("""
|
83 |
+
**Key Configuration Fields:**
|
84 |
+
|
85 |
+
**Space Title** - Display name for your assistant
|
86 |
+
|
87 |
+
**Space Description** - Brief explanation of purpose
|
88 |
+
|
89 |
+
**Model Selection** - Choose from optimized models:
|
90 |
+
- **Gemma 3 27B** - Open-source, sustainable
|
91 |
+
- **Gemini 2.0 Flash** - Fast, reliable
|
92 |
+
- **Claude 3.5 Haiku** - Complex reasoning
|
93 |
+
|
94 |
+
**API Key Variable** - Default: `OPENROUTER_API_KEY`
|
95 |
+
|
96 |
+
**Access Code** - Optional student protection
|
97 |
+
""")
|
98 |
|
99 |
+
gr.Markdown("""
|
100 |
+
**Model Comparison:**
|
101 |
- **[google/gemma-3-27b-it](https://openrouter.ai/models/google/gemma-3-27b-it)**: Open-source, sustainable option with excellent performance
|
102 |
- **[google/gemini-2.0-flash-001](https://openrouter.ai/models/google/gemini-2.0-flash-001)**: Fast, reliable, good for general tasks
|
|
|
|
|
103 |
- **[anthropic/claude-3.5-haiku](https://openrouter.ai/models/anthropic/claude-3.5-haiku)**: Great for complex reasoning and analysis
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
""")
|
105 |
|
106 |
+
with gr.Accordion("🤖 Space Configuration", open=False):
|
107 |
gr.Markdown("""
|
108 |
### System Prompt Design
|
109 |
+
""")
|
110 |
|
111 |
+
# TODO: Add system prompt interface screenshot
|
112 |
+
with gr.Row():
|
113 |
+
with gr.Column(scale=1):
|
114 |
+
gr.Image(
|
115 |
+
value="system_prompt_interface.png", # Placeholder for system prompt interface
|
116 |
+
label="System Prompt Configuration Interface",
|
117 |
+
show_label=True,
|
118 |
+
interactive=False,
|
119 |
+
width=500,
|
120 |
+
height=300,
|
121 |
+
container=False
|
122 |
+
)
|
123 |
+
with gr.Column(scale=1):
|
124 |
+
gr.Markdown("""
|
125 |
+
**Template System:**
|
126 |
+
|
127 |
+
**Research Template** - Pre-configured for academic use
|
128 |
+
- MLA citation formatting
|
129 |
+
- Fact-checking emphasis
|
130 |
+
- Auto-enables URL fetching
|
131 |
+
|
132 |
+
**Socratic Template** - Pedagogical questioning
|
133 |
+
- Constructivist learning approach
|
134 |
+
- Critical thinking focus
|
135 |
+
|
136 |
+
**Custom Prompt** - Build your own
|
137 |
+
""")
|
138 |
|
139 |
+
gr.Markdown("""
|
140 |
**Best Practices:**
|
141 |
- Be specific about the assistant's role and purpose
|
142 |
- Include behavioral guidelines and constraints
|
143 |
- Mention the intended audience (students, researchers, etc.)
|
144 |
- List key capabilities and tasks
|
145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
### Copy-Pasteable System Prompts
|
147 |
|
148 |
**Biology Course Assistant:**
|
|
|
179 |
with gr.Accordion("🔬 Preview Tab Usage", open=False):
|
180 |
gr.Markdown("""
|
181 |
### Testing Your Assistant Before Deployment
|
182 |
+
""")
|
183 |
|
184 |
+
# TODO: Add preview tab screenshot
|
185 |
+
with gr.Row():
|
186 |
+
with gr.Column(scale=1):
|
187 |
+
gr.Image(
|
188 |
+
value="preview_tab.png", # Placeholder for preview tab screenshot
|
189 |
+
label="Preview Tab with Active Chat Interface",
|
190 |
+
show_label=True,
|
191 |
+
interactive=False,
|
192 |
+
width=500,
|
193 |
+
height=400,
|
194 |
+
container=False
|
195 |
+
)
|
196 |
+
with gr.Column(scale=1):
|
197 |
+
gr.Markdown("""
|
198 |
+
**Preview Features:**
|
199 |
+
|
200 |
+
**Real API Integration** - Uses actual OpenRouter API
|
201 |
+
|
202 |
+
**Configuration Display** - Shows current setup
|
203 |
+
|
204 |
+
**URL Testing** - Add up to 4 URLs for context
|
205 |
+
|
206 |
+
**Chat Export** - Download conversation logs
|
207 |
+
|
208 |
+
**Clear Function** - Reset chat history
|
209 |
+
""")
|
210 |
|
211 |
+
gr.Markdown("""
|
212 |
**How to Use the Preview:**
|
213 |
1. **Configure First**: Complete your setup in the Configuration tab
|
214 |
2. **Click Preview Button**: Use "Preview Deployment Package" to activate the preview
|
|
|
216 |
4. **Test URL Context**: Add URLs to test grounding functionality
|
217 |
5. **Export Conversations**: Save chat logs for analysis
|
218 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
**Preview Requirements:**
|
220 |
- Set `OPENROUTER_API_KEY` environment variable for real API testing
|
221 |
- Without API key: Shows configuration but no actual chat responses
|
|
|
222 |
|
223 |
**Testing Best Practices:**
|
224 |
- Test different types of queries to validate assistant behavior
|
|
|
230 |
with gr.Accordion("💬 Example Prompts", open=False):
|
231 |
gr.Markdown("""
|
232 |
### Creating Effective Example Prompts
|
233 |
+
""")
|
234 |
|
235 |
+
# TODO: Add example prompts interface screenshot
|
236 |
+
with gr.Row():
|
237 |
+
with gr.Column(scale=1):
|
238 |
+
gr.Image(
|
239 |
+
value="example_prompts.png", # Placeholder for example prompts interface
|
240 |
+
label="Example Prompts Interface and Display",
|
241 |
+
show_label=True,
|
242 |
+
interactive=False,
|
243 |
+
width=500,
|
244 |
+
height=300,
|
245 |
+
container=False
|
246 |
+
)
|
247 |
+
with gr.Column(scale=1):
|
248 |
+
gr.Markdown("""
|
249 |
+
**Guidelines:**
|
250 |
+
|
251 |
+
**Format** - One prompt per line
|
252 |
+
|
253 |
+
**Length** - Under 100 characters each
|
254 |
+
|
255 |
+
**Content** - Show assistant capabilities
|
256 |
+
|
257 |
+
**Language** - Natural and conversational
|
258 |
+
|
259 |
+
**URLs** - Include if assistant processes them
|
260 |
+
""")
|
261 |
|
262 |
+
gr.Markdown("""
|
263 |
**Examples by Use Case:**
|
264 |
|
265 |
**Course Assistant:**
|
|
|
287 |
with gr.Accordion("🔧 Tool Settings & Configuration", open=False):
|
288 |
gr.Markdown("""
|
289 |
### Configuration Tab Structure
|
290 |
+
""")
|
291 |
|
292 |
+
# TODO: Add tool settings diagram
|
293 |
+
with gr.Row():
|
294 |
+
with gr.Column(scale=1):
|
295 |
+
gr.Image(
|
296 |
+
value="tool_settings_diagram.png", # Placeholder for tool settings structure diagram
|
297 |
+
label="Configuration Tab Structure and Tool Integration",
|
298 |
+
show_label=True,
|
299 |
+
interactive=False,
|
300 |
+
width=500,
|
301 |
+
height=400,
|
302 |
+
container=False
|
303 |
+
)
|
304 |
+
with gr.Column(scale=1):
|
305 |
+
gr.Markdown("""
|
306 |
+
**Main Configuration Fields:**
|
307 |
+
- Space Title & Description
|
308 |
+
- Model Selection
|
309 |
+
- API Key Variable
|
310 |
+
- Access Code (optional)
|
311 |
+
|
312 |
+
**Assistant Configuration:**
|
313 |
+
- System Prompt
|
314 |
+
- Template Selection
|
315 |
+
- URL Grounding (2-4 URLs)
|
316 |
+
- Example Prompts
|
317 |
+
|
318 |
+
**Advanced Settings:**
|
319 |
+
- Temperature (0.0-2.0)
|
320 |
+
- Max Tokens (50-4096)
|
321 |
+
""")
|
322 |
|
323 |
+
gr.Markdown("""
|
324 |
### Tool Integration Options
|
325 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
326 |
**URL Grounding (Static Context)**
|
327 |
- Add 2-4 URLs for consistent context across all responses
|
328 |
- Content fetched once during generation and cached
|
|
|
338 |
with gr.Accordion("🎛️ Advanced Settings", open=False):
|
339 |
gr.Markdown("""
|
340 |
### Model Parameters
|
341 |
+
""")
|
342 |
|
343 |
+
# TODO: Add advanced settings interface screenshot
|
344 |
+
with gr.Row():
|
345 |
+
with gr.Column(scale=1):
|
346 |
+
gr.Image(
|
347 |
+
value="advanced_settings.png", # Placeholder for advanced settings interface
|
348 |
+
label="Advanced Settings: Temperature and Token Controls",
|
349 |
+
show_label=True,
|
350 |
+
interactive=False,
|
351 |
+
width=500,
|
352 |
+
height=300,
|
353 |
+
container=False
|
354 |
+
)
|
355 |
+
with gr.Column(scale=1):
|
356 |
+
gr.Markdown("""
|
357 |
+
**Temperature (0.0 - 2.0)**
|
358 |
+
- **0.0-0.3**: Very focused, deterministic
|
359 |
+
- **0.4-0.7**: Balanced (recommended)
|
360 |
+
- **0.8-1.2**: More creative and varied
|
361 |
+
- **1.3-2.0**: Highly creative, unpredictable
|
362 |
+
|
363 |
+
**Max Response Tokens (50-4096)**
|
364 |
+
- **50-200**: Short, concise answers
|
365 |
+
- **200-500**: Medium responses (recommended)
|
366 |
+
- **500-1000**: Longer, detailed explanations
|
367 |
+
- **1000+**: Extended analysis
|
368 |
+
""")
|
369 |
|
370 |
+
gr.Markdown("""
|
371 |
**Token Usage Notes:**
|
372 |
- Tokens include both input (your prompt + context) and output
|
373 |
- Longer contexts (URLs) use more input tokens
|
|
|
377 |
with gr.Accordion("🚀 Deployment Process", open=False):
|
378 |
gr.Markdown("""
|
379 |
### Quick Deployment Guide
|
380 |
+
""")
|
381 |
+
|
382 |
+
# TODO: Add deployment process diagram
|
383 |
+
with gr.Row():
|
384 |
+
with gr.Column(scale=1):
|
385 |
+
gr.Image(
|
386 |
+
value="deployment_process.png", # Placeholder for deployment process diagram
|
387 |
+
label="Complete Deployment Process Flow",
|
388 |
+
show_label=True,
|
389 |
+
interactive=False,
|
390 |
+
width=500,
|
391 |
+
height=300,
|
392 |
+
container=False
|
393 |
+
)
|
394 |
+
with gr.Column(scale=1):
|
395 |
+
gr.Markdown("""
|
396 |
+
**Deployment Steps:**
|
397 |
+
|
398 |
+
**1. Generate Package** - Download zip file
|
399 |
+
|
400 |
+
**2. Create Space** - New HuggingFace Space (Gradio SDK)
|
401 |
+
|
402 |
+
**3. Upload Files** - app.py and requirements.txt
|
403 |
+
|
404 |
+
**4. Add API Key** - Secret configuration
|
405 |
+
|
406 |
+
**5. Deploy & Test** - Wait for build, then test
|
407 |
+
""")
|
408 |
+
|
409 |
+
gr.Markdown("""
|
410 |
+
**Detailed Steps:**
|
411 |
|
412 |
**1. Generate & Upload**
|
413 |
- Click "Generate Deployment Package" → download zip
|
|
|
490 |
- Check for typos in the access code
|
491 |
- Case-sensitive matching
|
492 |
|
|
|
|
|
493 |
**URLs not fetching content**
|
494 |
- Check URLs are publicly accessible
|
495 |
- Some sites block automated requests
|
|
|
562 |
- Language practice partners
|
563 |
""")
|
564 |
|
565 |
+
def export_conversation_to_markdown(conversation_history, config_metadata=None):
|
566 |
+
"""Export conversation history to markdown format with configuration metadata"""
|
567 |
if not conversation_history:
|
568 |
return "No conversation to export."
|
569 |
|
570 |
markdown_content = f"""# Conversation Export
|
571 |
Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
572 |
|
573 |
+
"""
|
574 |
+
|
575 |
+
# Add configuration metadata if provided
|
576 |
+
if config_metadata:
|
577 |
+
markdown_content += """## Configuration Information
|
578 |
|
579 |
"""
|
580 |
+
|
581 |
+
# Add basic configuration details
|
582 |
+
if config_metadata.get('name'):
|
583 |
+
markdown_content += f"**Assistant Name:** {config_metadata['name']}\n"
|
584 |
+
if config_metadata.get('description'):
|
585 |
+
markdown_content += f"**Description:** {config_metadata['description']}\n"
|
586 |
+
if config_metadata.get('model'):
|
587 |
+
markdown_content += f"**Model:** {config_metadata['model']}\n"
|
588 |
+
if config_metadata.get('temperature'):
|
589 |
+
markdown_content += f"**Temperature:** {config_metadata['temperature']}\n"
|
590 |
+
if config_metadata.get('max_tokens'):
|
591 |
+
markdown_content += f"**Max Tokens:** {config_metadata['max_tokens']}\n"
|
592 |
+
|
593 |
+
# Add URL grounding information
|
594 |
+
grounding_urls = []
|
595 |
+
for i in range(1, 5):
|
596 |
+
url = config_metadata.get(f'url{i}')
|
597 |
+
if url and url.strip():
|
598 |
+
grounding_urls.append(url.strip())
|
599 |
+
|
600 |
+
if grounding_urls:
|
601 |
+
markdown_content += f"\n**URL Grounding ({len(grounding_urls)} URLs):**\n"
|
602 |
+
for i, url in enumerate(grounding_urls, 1):
|
603 |
+
markdown_content += f"- URL {i}: {url}\n"
|
604 |
+
|
605 |
+
# Add feature flags
|
606 |
+
if config_metadata.get('enable_dynamic_urls'):
|
607 |
+
markdown_content += f"\n**Dynamic URL Fetching:** Enabled\n"
|
608 |
+
|
609 |
+
# Add system prompt
|
610 |
+
if config_metadata.get('system_prompt'):
|
611 |
+
system_prompt = config_metadata['system_prompt']
|
612 |
+
markdown_content += f"\n**System Prompt:**\n```\n{system_prompt}\n```\n"
|
613 |
+
|
614 |
+
markdown_content += "\n---\n\n"
|
615 |
+
else:
|
616 |
+
markdown_content += "---\n\n"
|
617 |
|
618 |
for i, message in enumerate(conversation_history):
|
619 |
if isinstance(message, dict):
|
vector_store.py
DELETED
@@ -1,308 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import pickle
|
3 |
-
import base64
|
4 |
-
from typing import List, Dict, Any, Tuple, Optional
|
5 |
-
import json
|
6 |
-
from dataclasses import dataclass
|
7 |
-
|
8 |
-
try:
|
9 |
-
from sentence_transformers import SentenceTransformer
|
10 |
-
HAS_SENTENCE_TRANSFORMERS = True
|
11 |
-
except ImportError:
|
12 |
-
HAS_SENTENCE_TRANSFORMERS = False
|
13 |
-
|
14 |
-
try:
|
15 |
-
import faiss
|
16 |
-
HAS_FAISS = True
|
17 |
-
except ImportError:
|
18 |
-
HAS_FAISS = False
|
19 |
-
|
20 |
-
|
21 |
-
@dataclass
|
22 |
-
class SearchResult:
|
23 |
-
chunk_id: str
|
24 |
-
text: str
|
25 |
-
score: float
|
26 |
-
metadata: Dict[str, Any]
|
27 |
-
|
28 |
-
|
29 |
-
class VectorStore:
|
30 |
-
def __init__(self, embedding_model: str = "all-MiniLM-L6-v2"):
|
31 |
-
self.embedding_model_name = embedding_model
|
32 |
-
self.embedding_model = None
|
33 |
-
self.index = None
|
34 |
-
self.chunks = {} # chunk_id -> chunk data
|
35 |
-
self.chunk_ids = [] # Ordered list for FAISS index mapping
|
36 |
-
self.dimension = 384 # Default for all-MiniLM-L6-v2
|
37 |
-
|
38 |
-
if HAS_SENTENCE_TRANSFORMERS:
|
39 |
-
self._initialize_model()
|
40 |
-
|
41 |
-
def _initialize_model(self):
|
42 |
-
"""Initialize the embedding model"""
|
43 |
-
if not HAS_SENTENCE_TRANSFORMERS:
|
44 |
-
raise ImportError("sentence-transformers not installed")
|
45 |
-
|
46 |
-
try:
|
47 |
-
print(f"Loading embedding model: {self.embedding_model_name}")
|
48 |
-
print("This may take a moment on first run as the model downloads...")
|
49 |
-
|
50 |
-
# Set environment variables to prevent multiprocessing issues
|
51 |
-
import os
|
52 |
-
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
|
53 |
-
os.environ['OMP_NUM_THREADS'] = '1'
|
54 |
-
os.environ['MKL_NUM_THREADS'] = '1'
|
55 |
-
|
56 |
-
# Initialize with specific settings to avoid multiprocessing issues
|
57 |
-
self.embedding_model = SentenceTransformer(
|
58 |
-
self.embedding_model_name,
|
59 |
-
device='cpu', # Force CPU to avoid GPU/multiprocessing conflicts
|
60 |
-
cache_folder=None, # Use default cache
|
61 |
-
# Additional parameters to reduce memory usage
|
62 |
-
use_auth_token=False,
|
63 |
-
trust_remote_code=False # Security best practice
|
64 |
-
)
|
65 |
-
|
66 |
-
# Disable multiprocessing for stability in web apps
|
67 |
-
if hasattr(self.embedding_model, 'pool'):
|
68 |
-
self.embedding_model.pool = None
|
69 |
-
|
70 |
-
# Additional stability measures for Gradio environment
|
71 |
-
if hasattr(self.embedding_model, '_modules'):
|
72 |
-
for module in self.embedding_model._modules.values():
|
73 |
-
if hasattr(module, 'num_workers'):
|
74 |
-
module.num_workers = 0
|
75 |
-
|
76 |
-
# Update dimension based on model
|
77 |
-
self.dimension = self.embedding_model.get_sentence_embedding_dimension()
|
78 |
-
print(f"Model loaded successfully, dimension: {self.dimension}")
|
79 |
-
except Exception as e:
|
80 |
-
print(f"Failed to initialize embedding model: {e}")
|
81 |
-
# Provide more specific error messages
|
82 |
-
if "connection" in str(e).lower() or "timeout" in str(e).lower():
|
83 |
-
raise RuntimeError(f"Network error downloading model '{self.embedding_model_name}'. "
|
84 |
-
f"Please check your internet connection and try again: {e}")
|
85 |
-
elif "memory" in str(e).lower() or "out of memory" in str(e).lower():
|
86 |
-
raise RuntimeError(f"Insufficient memory to load model '{self.embedding_model_name}'. "
|
87 |
-
f"Try using a smaller model or increase available memory: {e}")
|
88 |
-
else:
|
89 |
-
raise RuntimeError(f"Could not load embedding model '{self.embedding_model_name}': {e}")
|
90 |
-
|
91 |
-
def create_embeddings(self, texts: List[str], batch_size: int = 8) -> np.ndarray:
|
92 |
-
"""Create embeddings for a list of texts"""
|
93 |
-
if not self.embedding_model:
|
94 |
-
self._initialize_model()
|
95 |
-
|
96 |
-
# Use smaller batch size for stability
|
97 |
-
embeddings = []
|
98 |
-
|
99 |
-
try:
|
100 |
-
print(f"Creating embeddings for {len(texts)} text chunks...")
|
101 |
-
for i in range(0, len(texts), batch_size):
|
102 |
-
batch = texts[i:i + batch_size]
|
103 |
-
print(f"Processing batch {i//batch_size + 1}/{(len(texts) + batch_size - 1)//batch_size}")
|
104 |
-
|
105 |
-
batch_embeddings = self.embedding_model.encode(
|
106 |
-
batch,
|
107 |
-
convert_to_numpy=True,
|
108 |
-
show_progress_bar=False,
|
109 |
-
device='cpu', # Force CPU to avoid GPU conflicts
|
110 |
-
normalize_embeddings=False, # We'll normalize later with FAISS
|
111 |
-
batch_size=min(batch_size, 4) # Extra safety on batch size
|
112 |
-
)
|
113 |
-
embeddings.append(batch_embeddings)
|
114 |
-
|
115 |
-
# Import gc for garbage collection
|
116 |
-
import gc
|
117 |
-
gc.collect() # Force garbage collection between batches
|
118 |
-
|
119 |
-
except Exception as e:
|
120 |
-
# Log the error and provide a helpful message
|
121 |
-
print(f"Error creating embeddings: {e}")
|
122 |
-
if "cuda" in str(e).lower() or "gpu" in str(e).lower():
|
123 |
-
raise RuntimeError(f"GPU/CUDA error encountered. The model is configured to use CPU only. Error: {e}")
|
124 |
-
elif "memory" in str(e).lower() or "out of memory" in str(e).lower():
|
125 |
-
raise RuntimeError(f"Out of memory while creating embeddings. Try uploading smaller files or fewer files at once: {e}")
|
126 |
-
else:
|
127 |
-
raise RuntimeError(f"Failed to create embeddings: {e}")
|
128 |
-
|
129 |
-
return np.vstack(embeddings) if embeddings else np.array([])
|
130 |
-
|
131 |
-
def build_index(self, chunks: List[Dict[str, Any]], show_progress: bool = True):
|
132 |
-
"""Build FAISS index from chunks"""
|
133 |
-
if not HAS_FAISS:
|
134 |
-
raise ImportError("faiss-cpu not installed")
|
135 |
-
|
136 |
-
# Extract texts and build embeddings
|
137 |
-
texts = [chunk['text'] for chunk in chunks]
|
138 |
-
|
139 |
-
if show_progress:
|
140 |
-
print(f"Creating embeddings for {len(texts)} chunks...")
|
141 |
-
|
142 |
-
embeddings = self.create_embeddings(texts)
|
143 |
-
|
144 |
-
# Build FAISS index
|
145 |
-
if show_progress:
|
146 |
-
print("Building FAISS index...")
|
147 |
-
|
148 |
-
# Use IndexFlatIP for inner product (cosine similarity with normalized vectors)
|
149 |
-
self.index = faiss.IndexFlatIP(self.dimension)
|
150 |
-
|
151 |
-
# Normalize embeddings for cosine similarity
|
152 |
-
faiss.normalize_L2(embeddings)
|
153 |
-
|
154 |
-
# Add to index
|
155 |
-
self.index.add(embeddings)
|
156 |
-
|
157 |
-
# Store chunks and maintain mapping
|
158 |
-
self.chunks = {}
|
159 |
-
self.chunk_ids = []
|
160 |
-
|
161 |
-
for chunk in chunks:
|
162 |
-
chunk_id = chunk['chunk_id']
|
163 |
-
self.chunks[chunk_id] = chunk
|
164 |
-
self.chunk_ids.append(chunk_id)
|
165 |
-
|
166 |
-
if show_progress:
|
167 |
-
print(f"Index built with {len(chunks)} chunks")
|
168 |
-
|
169 |
-
def search(self, query: str, top_k: int = 5, score_threshold: float = 0.3) -> List[SearchResult]:
|
170 |
-
"""Search for similar chunks"""
|
171 |
-
if not self.index or not self.chunks:
|
172 |
-
return []
|
173 |
-
|
174 |
-
# Create query embedding
|
175 |
-
query_embedding = self.create_embeddings([query])
|
176 |
-
|
177 |
-
# Normalize for cosine similarity
|
178 |
-
faiss.normalize_L2(query_embedding)
|
179 |
-
|
180 |
-
# Search
|
181 |
-
scores, indices = self.index.search(query_embedding, min(top_k, len(self.chunks)))
|
182 |
-
|
183 |
-
# Convert to results
|
184 |
-
results = []
|
185 |
-
|
186 |
-
for score, idx in zip(scores[0], indices[0]):
|
187 |
-
if idx < 0 or score < score_threshold:
|
188 |
-
continue
|
189 |
-
|
190 |
-
chunk_id = self.chunk_ids[idx]
|
191 |
-
chunk = self.chunks[chunk_id]
|
192 |
-
|
193 |
-
result = SearchResult(
|
194 |
-
chunk_id=chunk_id,
|
195 |
-
text=chunk['text'],
|
196 |
-
score=float(score),
|
197 |
-
metadata=chunk.get('metadata', {})
|
198 |
-
)
|
199 |
-
results.append(result)
|
200 |
-
|
201 |
-
return results
|
202 |
-
|
203 |
-
def serialize(self) -> Dict[str, Any]:
|
204 |
-
"""Serialize the vector store for deployment"""
|
205 |
-
if not self.index:
|
206 |
-
raise ValueError("No index to serialize")
|
207 |
-
|
208 |
-
# Serialize FAISS index
|
209 |
-
index_bytes = faiss.serialize_index(self.index)
|
210 |
-
index_base64 = base64.b64encode(index_bytes).decode('utf-8')
|
211 |
-
|
212 |
-
return {
|
213 |
-
'index_base64': index_base64,
|
214 |
-
'chunks': self.chunks,
|
215 |
-
'chunk_ids': self.chunk_ids,
|
216 |
-
'dimension': self.dimension,
|
217 |
-
'model_name': self.embedding_model_name
|
218 |
-
}
|
219 |
-
|
220 |
-
@classmethod
|
221 |
-
def deserialize(cls, data: Dict[str, Any]) -> 'VectorStore':
|
222 |
-
"""Deserialize a vector store from deployment data"""
|
223 |
-
if not HAS_FAISS:
|
224 |
-
raise ImportError("faiss-cpu not installed")
|
225 |
-
|
226 |
-
store = cls(embedding_model=data['model_name'])
|
227 |
-
|
228 |
-
# Deserialize FAISS index
|
229 |
-
index_bytes = base64.b64decode(data['index_base64'])
|
230 |
-
store.index = faiss.deserialize_index(index_bytes)
|
231 |
-
|
232 |
-
# Restore chunks and mappings
|
233 |
-
store.chunks = data['chunks']
|
234 |
-
store.chunk_ids = data['chunk_ids']
|
235 |
-
store.dimension = data['dimension']
|
236 |
-
|
237 |
-
return store
|
238 |
-
|
239 |
-
def get_stats(self) -> Dict[str, Any]:
|
240 |
-
"""Get statistics about the vector store"""
|
241 |
-
return {
|
242 |
-
'total_chunks': len(self.chunks),
|
243 |
-
'index_size': self.index.ntotal if self.index else 0,
|
244 |
-
'dimension': self.dimension,
|
245 |
-
'model': self.embedding_model_name
|
246 |
-
}
|
247 |
-
|
248 |
-
|
249 |
-
class LightweightVectorStore:
|
250 |
-
"""Lightweight version for deployed spaces without embedding model"""
|
251 |
-
|
252 |
-
def __init__(self, serialized_data: Dict[str, Any]):
|
253 |
-
if not HAS_FAISS:
|
254 |
-
raise ImportError("faiss-cpu not installed")
|
255 |
-
|
256 |
-
# Deserialize FAISS index
|
257 |
-
index_bytes = base64.b64decode(serialized_data['index_base64'])
|
258 |
-
self.index = faiss.deserialize_index(index_bytes)
|
259 |
-
|
260 |
-
# Restore chunks and mappings
|
261 |
-
self.chunks = serialized_data['chunks']
|
262 |
-
self.chunk_ids = serialized_data['chunk_ids']
|
263 |
-
self.dimension = serialized_data['dimension']
|
264 |
-
|
265 |
-
# For query embedding, we'll need to include pre-computed embeddings
|
266 |
-
# or use a lightweight embedding service
|
267 |
-
self.query_embeddings_cache = serialized_data.get('query_embeddings_cache', {})
|
268 |
-
|
269 |
-
def search_with_embedding(self, query_embedding: np.ndarray, top_k: int = 5, score_threshold: float = 0.3) -> List[SearchResult]:
|
270 |
-
"""Search using pre-computed query embedding"""
|
271 |
-
if not self.index or not self.chunks:
|
272 |
-
return []
|
273 |
-
|
274 |
-
# Normalize for cosine similarity
|
275 |
-
faiss.normalize_L2(query_embedding)
|
276 |
-
|
277 |
-
# Search
|
278 |
-
scores, indices = self.index.search(query_embedding, min(top_k, len(self.chunks)))
|
279 |
-
|
280 |
-
# Convert to results
|
281 |
-
results = []
|
282 |
-
|
283 |
-
for score, idx in zip(scores[0], indices[0]):
|
284 |
-
if idx < 0 or score < score_threshold:
|
285 |
-
continue
|
286 |
-
|
287 |
-
chunk_id = self.chunk_ids[idx]
|
288 |
-
chunk = self.chunks[chunk_id]
|
289 |
-
|
290 |
-
result = SearchResult(
|
291 |
-
chunk_id=chunk_id,
|
292 |
-
text=chunk['text'],
|
293 |
-
score=float(score),
|
294 |
-
metadata=chunk.get('metadata', {})
|
295 |
-
)
|
296 |
-
results.append(result)
|
297 |
-
|
298 |
-
return results
|
299 |
-
|
300 |
-
|
301 |
-
# Utility functions
|
302 |
-
def estimate_index_size(num_chunks: int, dimension: int = 384) -> float:
|
303 |
-
"""Estimate the size of the index in MB"""
|
304 |
-
# Rough estimation: 4 bytes per float * dimension * num_chunks
|
305 |
-
bytes_size = 4 * dimension * num_chunks
|
306 |
-
# Add overhead for index structure and metadata
|
307 |
-
overhead = 1.2
|
308 |
-
return (bytes_size * overhead) / (1024 * 1024)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|