milwright commited on
Commit
675fc70
·
1 Parent(s): 814e035

Fix examples formatting and remove RAG functionality

Browse files

- Fixed examples processing in generate_zip function to use repr() instead of json.dumps()
- This ensures examples are properly formatted as Python literals for gr.ChatInterface
- Updated theme specification from gr.themes.Default() to 'default' string
- Removed RAG functionality files: rag_tool.py and vector_store.py
- Enhanced support_docs.py with placeholder comments for future image integration
- Updated export_conversation_to_markdown to include configuration metadata

Files changed (4) hide show
  1. app.py +1 -1
  2. rag_tool.py +0 -208
  3. support_docs.py +290 -116
  4. vector_store.py +0 -308
app.py CHANGED
@@ -1603,7 +1603,7 @@ with gr.Blocks(
1603
  border-radius: 6px;
1604
  }
1605
  """,
1606
- theme=gr.themes.Default(),
1607
  head="""
1608
  <style>
1609
  /* Additional head styles to prevent manifest issues */
 
1603
  border-radius: 6px;
1604
  }
1605
  """,
1606
+ theme="default",
1607
  head="""
1608
  <style>
1609
  /* Additional head styles to prevent manifest issues */
rag_tool.py DELETED
@@ -1,208 +0,0 @@
1
- import json
2
- from typing import List, Dict, Any, Optional, Tuple
3
- from document_processor import DocumentProcessor, DocumentChunk
4
- from vector_store import VectorStore, SearchResult
5
- import os
6
- import tempfile
7
- from pathlib import Path
8
-
9
-
10
- class RAGTool:
11
- """RAG tool for integrating document search with chat"""
12
-
13
- def __init__(self):
14
- self.processor = DocumentProcessor(chunk_size=800, chunk_overlap=100)
15
- self.vector_store = VectorStore()
16
- self.processed_files = []
17
- self.total_chunks = 0
18
-
19
- def process_uploaded_files(self, file_paths: List[str]) -> Dict[str, Any]:
20
- """Process uploaded files and build vector index"""
21
-
22
- # Validate files
23
- valid_files = []
24
- errors = []
25
-
26
- for file_path in file_paths:
27
- try:
28
- # Check file size (10MB limit)
29
- size_mb = os.path.getsize(file_path) / (1024 * 1024)
30
- if size_mb > 10:
31
- errors.append({
32
- 'file': Path(file_path).name,
33
- 'error': f'File too large ({size_mb:.1f}MB). Maximum size is 10MB.'
34
- })
35
- continue
36
-
37
- valid_files.append(file_path)
38
-
39
- except Exception as e:
40
- errors.append({
41
- 'file': Path(file_path).name,
42
- 'error': str(e)
43
- })
44
-
45
- if not valid_files:
46
- return {
47
- 'success': False,
48
- 'message': 'No valid files to process',
49
- 'errors': errors
50
- }
51
-
52
- # Process files
53
- all_chunks, summary = self.processor.process_multiple_files(valid_files)
54
-
55
- if not all_chunks:
56
- return {
57
- 'success': False,
58
- 'message': 'No content extracted from files',
59
- 'summary': summary
60
- }
61
-
62
- # Build vector index
63
- chunk_dicts = [chunk.to_dict() for chunk in all_chunks]
64
- self.vector_store.build_index(chunk_dicts, show_progress=False)
65
-
66
- # Update stats
67
- self.processed_files = summary['files_processed']
68
- self.total_chunks = len(all_chunks)
69
-
70
- # Calculate index size
71
- index_stats = self.vector_store.get_stats()
72
-
73
- return {
74
- 'success': True,
75
- 'message': f'Successfully processed {len(valid_files)} files into {self.total_chunks} chunks',
76
- 'summary': summary,
77
- 'index_stats': index_stats,
78
- 'errors': errors
79
- }
80
-
81
- def get_relevant_context(self, query: str, max_chunks: int = 3) -> str:
82
- """Get relevant context for a query"""
83
- if not self.vector_store.index:
84
- return ""
85
-
86
- # Search for relevant chunks
87
- results = self.vector_store.search(
88
- query=query,
89
- top_k=max_chunks,
90
- score_threshold=0.3
91
- )
92
-
93
- if not results:
94
- return ""
95
-
96
- # Format context
97
- context_parts = []
98
-
99
- for i, result in enumerate(results, 1):
100
- file_name = result.metadata.get('file_name', 'Unknown')
101
- context_parts.append(
102
- f"[Document: {file_name} - Relevance: {result.score:.2f}]\n{result.text}"
103
- )
104
-
105
- return "\n\n".join(context_parts)
106
-
107
- def get_serialized_data(self) -> Dict[str, Any]:
108
- """Get serialized data for deployment"""
109
- if not self.vector_store.index:
110
- return None
111
-
112
- return self.vector_store.serialize()
113
-
114
- def get_deployment_info(self) -> Dict[str, Any]:
115
- """Get information for deployment package"""
116
- if not self.vector_store.index:
117
- return {
118
- 'enabled': False,
119
- 'message': 'No documents processed'
120
- }
121
-
122
- # Estimate package size increase
123
- index_stats = self.vector_store.get_stats()
124
- estimated_size_mb = (
125
- # Index size estimation
126
- (index_stats['total_chunks'] * index_stats['dimension'] * 4) / (1024 * 1024) +
127
- # Chunks text size estimation
128
- (sum(len(chunk['text']) for chunk in self.vector_store.chunks.values()) / (1024 * 1024))
129
- ) * 1.5 # Add overhead for base64 encoding
130
-
131
- return {
132
- 'enabled': True,
133
- 'total_files': len(self.processed_files),
134
- 'total_chunks': self.total_chunks,
135
- 'estimated_size_mb': round(estimated_size_mb, 2),
136
- 'files': [f['name'] for f in self.processed_files]
137
- }
138
-
139
-
140
- def create_rag_module_for_space(serialized_data: Dict[str, Any]) -> str:
141
- """Create a minimal RAG module for the deployed space"""
142
-
143
- return '''# RAG Module for deployed space
144
- import numpy as np
145
- import faiss
146
- import base64
147
- import json
148
-
149
- class RAGContext:
150
- def __init__(self, serialized_data):
151
- # Deserialize FAISS index
152
- index_bytes = base64.b64decode(serialized_data['index_base64'])
153
- self.index = faiss.deserialize_index(index_bytes)
154
-
155
- # Restore chunks and mappings
156
- self.chunks = serialized_data['chunks']
157
- self.chunk_ids = serialized_data['chunk_ids']
158
-
159
- def get_context(self, query_embedding, max_chunks=3):
160
- """Get relevant context using pre-computed embedding"""
161
- if not self.index:
162
- return ""
163
-
164
- # Normalize and search
165
- faiss.normalize_L2(query_embedding)
166
- scores, indices = self.index.search(query_embedding, max_chunks)
167
-
168
- # Format results
169
- context_parts = []
170
-
171
- for score, idx in zip(scores[0], indices[0]):
172
- if idx < 0 or score < 0.3:
173
- continue
174
-
175
- chunk = self.chunks[self.chunk_ids[idx]]
176
- file_name = chunk.get('metadata', {}).get('file_name', 'Document')
177
-
178
- context_parts.append(
179
- f"[{file_name} - Relevance: {score:.2f}]\\n{chunk['text']}"
180
- )
181
-
182
- return "\\n\\n".join(context_parts) if context_parts else ""
183
-
184
- # Initialize RAG context
185
- RAG_DATA = json.loads(\'\'\'{{rag_data_json}}\'\'\')
186
- rag_context = RAGContext(RAG_DATA) if RAG_DATA else None
187
-
188
- def get_rag_context(query):
189
- """Get relevant context for a query"""
190
- if not rag_context:
191
- return ""
192
-
193
- # In production, you'd compute query embedding here
194
- # For now, return empty (would need embedding service)
195
- return ""
196
- '''
197
-
198
-
199
- def format_context_for_prompt(context: str, query: str) -> str:
200
- """Format RAG context for inclusion in prompt"""
201
- if not context:
202
- return ""
203
-
204
- return f"""Relevant context from uploaded documents:
205
-
206
- {context}
207
-
208
- Please use the above context to help answer the user's question: {query}"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
support_docs.py CHANGED
@@ -1,5 +1,16 @@
1
  """
2
  Support documentation module with accordion-style help sections
 
 
 
 
 
 
 
 
 
 
 
3
  """
4
 
5
  import gradio as gr
@@ -11,25 +22,39 @@ def create_support_docs():
11
 
12
  with gr.Column():
13
  gr.Markdown("# Support Documentation")
 
14
  gr.Markdown("Complete step-by-step guidance for creating and deploying chat interfaces with HuggingFace Spaces.")
15
 
16
  with gr.Accordion("🚀 Getting Started", open=True):
17
  gr.Markdown("""
18
  ### Quick Start Guide
 
 
19
 
20
- **Three-Tab Interface:**
21
- 1. **Configuration Tab**: Set up your space settings, assistant configuration, and tool integrations
22
- 2. **Preview Tab**: Test your assistant with real API integration before deployment
23
- 3. **Support Tab**: Access comprehensive documentation and help (this tab)
24
-
25
- **Workflow Steps:**
26
- 1. **Configure your Space** in the Configuration tab (space title, description, model selection)
27
- 2. **Set up Assistant** with system prompt and optional research template
28
- 3. **Enable Tools** like dynamic URL fetching or URL grounding as needed
29
- 4. **Preview & Test** using the Preview tab to validate your configuration
30
- 5. **Generate Package** with the "Generate Deployment Package" button
31
- 6. **Deploy to HuggingFace** following the included README instructions
 
 
 
 
 
 
 
 
 
 
32
 
 
33
  **Prerequisites:**
34
  - HuggingFace account (free at huggingface.co)
35
  - OpenRouter API key (get at openrouter.ai/keys)
@@ -39,58 +64,85 @@ def create_support_docs():
39
  with gr.Accordion("⚙️ Space Settings", open=False):
40
  gr.Markdown("""
41
  ### Space Configuration Fields
 
42
 
43
- **Space Title**
44
- - The name that will appear on HuggingFace and in your chat interface
45
- - Keep it descriptive but concise (e.g., "Biology Course Assistant")
46
-
47
- **Space Description**
48
- - Brief explanation of what your assistant does
49
- - Will appear in the HuggingFace Space listing and at the top of your chat
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
- **Model Selection**
 
52
  - **[google/gemma-3-27b-it](https://openrouter.ai/models/google/gemma-3-27b-it)**: Open-source, sustainable option with excellent performance
53
  - **[google/gemini-2.0-flash-001](https://openrouter.ai/models/google/gemini-2.0-flash-001)**: Fast, reliable, good for general tasks
54
- - **[mistralai/mistral-medium](https://openrouter.ai/models/mistralai/mistral-medium)**: Good for technical topics
55
- - **[openai/gpt-4o-nano](https://openrouter.ai/models/openai/gpt-4o-nano)**: Balanced performance and cost
56
  - **[anthropic/claude-3.5-haiku](https://openrouter.ai/models/anthropic/claude-3.5-haiku)**: Great for complex reasoning and analysis
57
-
58
- **API Key Variable Name**
59
- - Default: `OPENROUTER_API_KEY`
60
- - This is the secret name you'll create in HuggingFace Space settings
61
- - Only change if you have specific naming requirements
62
-
63
- **Access Code (Optional)**
64
- - Leave empty for public access
65
- - Set a code to restrict access to students/specific users
66
- - Code is stored securely as an environment variable
67
  """)
68
 
69
- with gr.Accordion("🤖 Assistant Configuration", open=False):
70
  gr.Markdown("""
71
  ### System Prompt Design
 
72
 
73
- The system prompt defines your assistant's personality, knowledge, and behavior.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
 
75
  **Best Practices:**
76
  - Be specific about the assistant's role and purpose
77
  - Include behavioral guidelines and constraints
78
  - Mention the intended audience (students, researchers, etc.)
79
  - List key capabilities and tasks
80
 
81
- **Research Template**
82
- - Pre-configured for academic research assistance
83
- - Includes MLA citation formatting
84
- - Emphasizes fact-checking and evidence-based responses
85
- - Automatically enables dynamic URL fetching
86
-
87
- **Custom Categories**
88
- - Break down your system prompt into structured sections:
89
- - **Role and Purpose**: What is the assistant and what does it do?
90
- - **Intended Audience**: Who will use this assistant?
91
- - **Key Tasks**: What specific capabilities should it have?
92
- - **Additional Context**: Extra instructions or constraints
93
-
94
  ### Copy-Pasteable System Prompts
95
 
96
  **Biology Course Assistant:**
@@ -127,9 +179,36 @@ def create_support_docs():
127
  with gr.Accordion("🔬 Preview Tab Usage", open=False):
128
  gr.Markdown("""
129
  ### Testing Your Assistant Before Deployment
 
130
 
131
- The Preview tab provides a sandbox environment to test your assistant with real API integration.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
 
133
  **How to Use the Preview:**
134
  1. **Configure First**: Complete your setup in the Configuration tab
135
  2. **Click Preview Button**: Use "Preview Deployment Package" to activate the preview
@@ -137,18 +216,9 @@ def create_support_docs():
137
  4. **Test URL Context**: Add URLs to test grounding functionality
138
  5. **Export Conversations**: Save chat logs for analysis
139
 
140
- **Preview Features:**
141
- - **Real API Integration**: Uses actual OpenRouter API when `OPENROUTER_API_KEY` is set
142
- - **Configuration Display**: Shows your current assistant setup
143
- - **URL Testing**: Add up to 4 URLs for context testing
144
- - **Dynamic URL Management**: Add/remove URL fields as needed
145
- - **Chat Export**: Download conversation logs as markdown files
146
- - **Clear Function**: Reset chat history for new tests
147
-
148
  **Preview Requirements:**
149
  - Set `OPENROUTER_API_KEY` environment variable for real API testing
150
  - Without API key: Shows configuration but no actual chat responses
151
- - All other features (URL fetching, configuration) work without API key
152
 
153
  **Testing Best Practices:**
154
  - Test different types of queries to validate assistant behavior
@@ -160,20 +230,36 @@ def create_support_docs():
160
  with gr.Accordion("💬 Example Prompts", open=False):
161
  gr.Markdown("""
162
  ### Creating Effective Example Prompts
 
163
 
164
- Example prompts appear as clickable suggestions in your chat interface.
165
-
166
- **Guidelines:**
167
- - Write 3-6 clear, specific examples
168
- - Show the range of what your assistant can do
169
- - Match your intended use cases
170
- - Include URLs if your assistant can process them
171
-
172
- **Format:**
173
- - One prompt per line
174
- - Keep each prompt under 100 characters for better display
175
- - Use natural, conversational language
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
 
177
  **Examples by Use Case:**
178
 
179
  **Course Assistant:**
@@ -201,40 +287,42 @@ def create_support_docs():
201
  with gr.Accordion("🔧 Tool Settings & Configuration", open=False):
202
  gr.Markdown("""
203
  ### Configuration Tab Structure
 
204
 
205
- **Main Configuration Fields** (always visible):
206
- - **Space Title**: Display name for your HuggingFace Space
207
- - **Space Description**: Brief explanation shown in space listing
208
- - **Model Selection**: Choose from optimized OpenRouter models
209
- - **API Key Variable**: Secret name for HuggingFace Space settings (default: `OPENROUTER_API_KEY`)
210
- - **Access Code**: Optional password protection for student access
211
-
212
- **Assistant Configuration Accordion** (open by default):
213
- - **System Prompt**: Main field defining assistant behavior and knowledge
214
- - **Research Template**: Pre-configured academic research assistant checkbox
215
- - **Web Search Integration**: Enable crawl4ai web search capabilities
216
- # Document RAG functionality removed
217
- - **URL Grounding**: Add up to 4 static URLs for context (dynamic add/remove)
218
- - **Example Prompts**: Clickable suggestions for users (one per line)
219
- - **Dynamic URL Fetching**: Hidden field (always enabled) for runtime URL processing
220
-
221
- **Advanced Settings Accordion**:
222
- - **Temperature**: Response creativity control (0.0-2.0)
223
- - **Max Tokens**: Response length limit (50-4096)
224
-
225
- **Action Buttons**:
226
- - **Preview Deployment Package**: Activate Preview tab testing
227
- - **Generate Deployment Package**: Create downloadable zip file
 
 
 
 
 
 
 
228
 
 
229
  ### Tool Integration Options
230
 
231
- **Web Search (crawl4ai)**
232
- - Real-time web searching using DuckDuckGo
233
- - Advanced content extraction and crawling
234
- - Automatically enabled with Research Template
235
-
236
- # Document RAG functionality removed
237
-
238
  **URL Grounding (Static Context)**
239
  - Add 2-4 URLs for consistent context across all responses
240
  - Content fetched once during generation and cached
@@ -250,20 +338,36 @@ def create_support_docs():
250
  with gr.Accordion("🎛️ Advanced Settings", open=False):
251
  gr.Markdown("""
252
  ### Model Parameters
 
253
 
254
- **Temperature (0.0 - 2.0)**
255
- - **0.0-0.3**: Very focused, deterministic responses
256
- - **0.4-0.7**: Balanced creativity and consistency (recommended)
257
- - **0.8-1.2**: More creative and varied responses
258
- - **1.3-2.0**: Highly creative, potentially unpredictable
259
-
260
- **Max Response Tokens (50-4096)**
261
- - Controls maximum length of assistant responses
262
- - **50-200**: Short, concise answers
263
- - **200-500**: Medium responses (recommended for most cases)
264
- - **500-1000**: Longer, detailed explanations
265
- - **1000+**: Extended analysis and comprehensive responses
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
 
 
267
  **Token Usage Notes:**
268
  - Tokens include both input (your prompt + context) and output
269
  - Longer contexts (URLs) use more input tokens
@@ -273,6 +377,37 @@ def create_support_docs():
273
  with gr.Accordion("🚀 Deployment Process", open=False):
274
  gr.Markdown("""
275
  ### Quick Deployment Guide
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
 
277
  **1. Generate & Upload**
278
  - Click "Generate Deployment Package" → download zip
@@ -355,8 +490,6 @@ def create_support_docs():
355
  - Check for typos in the access code
356
  - Case-sensitive matching
357
 
358
- # Document RAG functionality removed
359
-
360
  **URLs not fetching content**
361
  - Check URLs are publicly accessible
362
  - Some sites block automated requests
@@ -429,17 +562,58 @@ def create_support_docs():
429
  - Language practice partners
430
  """)
431
 
432
- def export_conversation_to_markdown(conversation_history):
433
- """Export conversation history to markdown format"""
434
  if not conversation_history:
435
  return "No conversation to export."
436
 
437
  markdown_content = f"""# Conversation Export
438
  Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
439
 
440
- ---
 
 
 
 
441
 
442
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
 
444
  for i, message in enumerate(conversation_history):
445
  if isinstance(message, dict):
 
1
  """
2
  Support documentation module with accordion-style help sections
3
+
4
+ IMAGE PLACEHOLDERS TO REPLACE:
5
+ 1. interface_overview.png - Three-tab interface overview (Getting Started)
6
+ 2. configuration_tab.png - Configuration tab screenshot (Space Settings)
7
+ 3. system_prompt_interface.png - System prompt and template interface (Assistant Configuration)
8
+ 4. preview_tab.png - Preview tab with chat interface (Preview Tab Usage)
9
+ 5. example_prompts.png - Example prompts input and display (Example Prompts)
10
+ 6. tool_settings_diagram.png - Configuration structure diagram (Tool Settings)
11
+ 7. advanced_settings.png - Temperature and token controls (Advanced Settings)
12
+ 8. deployment_process.png - Deployment process flow diagram (Deployment Process)
13
+ 9. secret.png - HuggingFace secret configuration (already exists)
14
  """
15
 
16
  import gradio as gr
 
22
 
23
  with gr.Column():
24
  gr.Markdown("# Support Documentation")
25
+ gr.Markdown("*Under construction - images coming soon!*")
26
  gr.Markdown("Complete step-by-step guidance for creating and deploying chat interfaces with HuggingFace Spaces.")
27
 
28
  with gr.Accordion("🚀 Getting Started", open=True):
29
  gr.Markdown("""
30
  ### Quick Start Guide
31
+ This guide helps you set up your first chat interface on HuggingFace Spaces using Gradio. Follow these steps to create a functional assistant with URL grounding and example prompts.
32
+ """)
33
 
34
+ # TODO: Add interface overview screenshot
35
+ with gr.Row():
36
+ with gr.Column(scale=1):
37
+ gr.Image(
38
+ value="interface_overview.png", # Placeholder for interface overview screenshot
39
+ label="Three-Tab Interface: Configuration, Preview, and Support",
40
+ show_label=True,
41
+ interactive=False,
42
+ width=600,
43
+ height=400,
44
+ container=False
45
+ )
46
+ with gr.Column(scale=1):
47
+ gr.Markdown("""
48
+ **Workflow Steps:**
49
+ 1. **Configure** your space settings
50
+ 2. **Set up** assistant with system prompt
51
+ 3. **Enable** tools like URL grounding
52
+ 4. **Preview** & test your configuration
53
+ 5. **Generate** deployment package
54
+ 6. **Deploy** to HuggingFace
55
+ """)
56
 
57
+ gr.Markdown("""
58
  **Prerequisites:**
59
  - HuggingFace account (free at huggingface.co)
60
  - OpenRouter API key (get at openrouter.ai/keys)
 
64
  with gr.Accordion("⚙️ Space Settings", open=False):
65
  gr.Markdown("""
66
  ### Space Configuration Fields
67
+ """)
68
 
69
+ # TODO: Add configuration tab screenshot
70
+ with gr.Row():
71
+ with gr.Column(scale=1):
72
+ gr.Image(
73
+ value="configuration_tab.png", # Placeholder for configuration tab screenshot
74
+ label="Configuration Tab Interface",
75
+ show_label=True,
76
+ interactive=False,
77
+ width=500,
78
+ height=400,
79
+ container=False
80
+ )
81
+ with gr.Column(scale=1):
82
+ gr.Markdown("""
83
+ **Key Configuration Fields:**
84
+
85
+ **Space Title** - Display name for your assistant
86
+
87
+ **Space Description** - Brief explanation of purpose
88
+
89
+ **Model Selection** - Choose from optimized models:
90
+ - **Gemma 3 27B** - Open-source, sustainable
91
+ - **Gemini 2.0 Flash** - Fast, reliable
92
+ - **Claude 3.5 Haiku** - Complex reasoning
93
+
94
+ **API Key Variable** - Default: `OPENROUTER_API_KEY`
95
+
96
+ **Access Code** - Optional student protection
97
+ """)
98
 
99
+ gr.Markdown("""
100
+ **Model Comparison:**
101
  - **[google/gemma-3-27b-it](https://openrouter.ai/models/google/gemma-3-27b-it)**: Open-source, sustainable option with excellent performance
102
  - **[google/gemini-2.0-flash-001](https://openrouter.ai/models/google/gemini-2.0-flash-001)**: Fast, reliable, good for general tasks
 
 
103
  - **[anthropic/claude-3.5-haiku](https://openrouter.ai/models/anthropic/claude-3.5-haiku)**: Great for complex reasoning and analysis
 
 
 
 
 
 
 
 
 
 
104
  """)
105
 
106
+ with gr.Accordion("🤖 Space Configuration", open=False):
107
  gr.Markdown("""
108
  ### System Prompt Design
109
+ """)
110
 
111
+ # TODO: Add system prompt interface screenshot
112
+ with gr.Row():
113
+ with gr.Column(scale=1):
114
+ gr.Image(
115
+ value="system_prompt_interface.png", # Placeholder for system prompt interface
116
+ label="System Prompt Configuration Interface",
117
+ show_label=True,
118
+ interactive=False,
119
+ width=500,
120
+ height=300,
121
+ container=False
122
+ )
123
+ with gr.Column(scale=1):
124
+ gr.Markdown("""
125
+ **Template System:**
126
+
127
+ **Research Template** - Pre-configured for academic use
128
+ - MLA citation formatting
129
+ - Fact-checking emphasis
130
+ - Auto-enables URL fetching
131
+
132
+ **Socratic Template** - Pedagogical questioning
133
+ - Constructivist learning approach
134
+ - Critical thinking focus
135
+
136
+ **Custom Prompt** - Build your own
137
+ """)
138
 
139
+ gr.Markdown("""
140
  **Best Practices:**
141
  - Be specific about the assistant's role and purpose
142
  - Include behavioral guidelines and constraints
143
  - Mention the intended audience (students, researchers, etc.)
144
  - List key capabilities and tasks
145
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  ### Copy-Pasteable System Prompts
147
 
148
  **Biology Course Assistant:**
 
179
  with gr.Accordion("🔬 Preview Tab Usage", open=False):
180
  gr.Markdown("""
181
  ### Testing Your Assistant Before Deployment
182
+ """)
183
 
184
+ # TODO: Add preview tab screenshot
185
+ with gr.Row():
186
+ with gr.Column(scale=1):
187
+ gr.Image(
188
+ value="preview_tab.png", # Placeholder for preview tab screenshot
189
+ label="Preview Tab with Active Chat Interface",
190
+ show_label=True,
191
+ interactive=False,
192
+ width=500,
193
+ height=400,
194
+ container=False
195
+ )
196
+ with gr.Column(scale=1):
197
+ gr.Markdown("""
198
+ **Preview Features:**
199
+
200
+ **Real API Integration** - Uses actual OpenRouter API
201
+
202
+ **Configuration Display** - Shows current setup
203
+
204
+ **URL Testing** - Add up to 4 URLs for context
205
+
206
+ **Chat Export** - Download conversation logs
207
+
208
+ **Clear Function** - Reset chat history
209
+ """)
210
 
211
+ gr.Markdown("""
212
  **How to Use the Preview:**
213
  1. **Configure First**: Complete your setup in the Configuration tab
214
  2. **Click Preview Button**: Use "Preview Deployment Package" to activate the preview
 
216
  4. **Test URL Context**: Add URLs to test grounding functionality
217
  5. **Export Conversations**: Save chat logs for analysis
218
 
 
 
 
 
 
 
 
 
219
  **Preview Requirements:**
220
  - Set `OPENROUTER_API_KEY` environment variable for real API testing
221
  - Without API key: Shows configuration but no actual chat responses
 
222
 
223
  **Testing Best Practices:**
224
  - Test different types of queries to validate assistant behavior
 
230
  with gr.Accordion("💬 Example Prompts", open=False):
231
  gr.Markdown("""
232
  ### Creating Effective Example Prompts
233
+ """)
234
 
235
+ # TODO: Add example prompts interface screenshot
236
+ with gr.Row():
237
+ with gr.Column(scale=1):
238
+ gr.Image(
239
+ value="example_prompts.png", # Placeholder for example prompts interface
240
+ label="Example Prompts Interface and Display",
241
+ show_label=True,
242
+ interactive=False,
243
+ width=500,
244
+ height=300,
245
+ container=False
246
+ )
247
+ with gr.Column(scale=1):
248
+ gr.Markdown("""
249
+ **Guidelines:**
250
+
251
+ **Format** - One prompt per line
252
+
253
+ **Length** - Under 100 characters each
254
+
255
+ **Content** - Show assistant capabilities
256
+
257
+ **Language** - Natural and conversational
258
+
259
+ **URLs** - Include if assistant processes them
260
+ """)
261
 
262
+ gr.Markdown("""
263
  **Examples by Use Case:**
264
 
265
  **Course Assistant:**
 
287
  with gr.Accordion("🔧 Tool Settings & Configuration", open=False):
288
  gr.Markdown("""
289
  ### Configuration Tab Structure
290
+ """)
291
 
292
+ # TODO: Add tool settings diagram
293
+ with gr.Row():
294
+ with gr.Column(scale=1):
295
+ gr.Image(
296
+ value="tool_settings_diagram.png", # Placeholder for tool settings structure diagram
297
+ label="Configuration Tab Structure and Tool Integration",
298
+ show_label=True,
299
+ interactive=False,
300
+ width=500,
301
+ height=400,
302
+ container=False
303
+ )
304
+ with gr.Column(scale=1):
305
+ gr.Markdown("""
306
+ **Main Configuration Fields:**
307
+ - Space Title & Description
308
+ - Model Selection
309
+ - API Key Variable
310
+ - Access Code (optional)
311
+
312
+ **Assistant Configuration:**
313
+ - System Prompt
314
+ - Template Selection
315
+ - URL Grounding (2-4 URLs)
316
+ - Example Prompts
317
+
318
+ **Advanced Settings:**
319
+ - Temperature (0.0-2.0)
320
+ - Max Tokens (50-4096)
321
+ """)
322
 
323
+ gr.Markdown("""
324
  ### Tool Integration Options
325
 
 
 
 
 
 
 
 
326
  **URL Grounding (Static Context)**
327
  - Add 2-4 URLs for consistent context across all responses
328
  - Content fetched once during generation and cached
 
338
  with gr.Accordion("🎛️ Advanced Settings", open=False):
339
  gr.Markdown("""
340
  ### Model Parameters
341
+ """)
342
 
343
+ # TODO: Add advanced settings interface screenshot
344
+ with gr.Row():
345
+ with gr.Column(scale=1):
346
+ gr.Image(
347
+ value="advanced_settings.png", # Placeholder for advanced settings interface
348
+ label="Advanced Settings: Temperature and Token Controls",
349
+ show_label=True,
350
+ interactive=False,
351
+ width=500,
352
+ height=300,
353
+ container=False
354
+ )
355
+ with gr.Column(scale=1):
356
+ gr.Markdown("""
357
+ **Temperature (0.0 - 2.0)**
358
+ - **0.0-0.3**: Very focused, deterministic
359
+ - **0.4-0.7**: Balanced (recommended)
360
+ - **0.8-1.2**: More creative and varied
361
+ - **1.3-2.0**: Highly creative, unpredictable
362
+
363
+ **Max Response Tokens (50-4096)**
364
+ - **50-200**: Short, concise answers
365
+ - **200-500**: Medium responses (recommended)
366
+ - **500-1000**: Longer, detailed explanations
367
+ - **1000+**: Extended analysis
368
+ """)
369
 
370
+ gr.Markdown("""
371
  **Token Usage Notes:**
372
  - Tokens include both input (your prompt + context) and output
373
  - Longer contexts (URLs) use more input tokens
 
377
  with gr.Accordion("🚀 Deployment Process", open=False):
378
  gr.Markdown("""
379
  ### Quick Deployment Guide
380
+ """)
381
+
382
+ # TODO: Add deployment process diagram
383
+ with gr.Row():
384
+ with gr.Column(scale=1):
385
+ gr.Image(
386
+ value="deployment_process.png", # Placeholder for deployment process diagram
387
+ label="Complete Deployment Process Flow",
388
+ show_label=True,
389
+ interactive=False,
390
+ width=500,
391
+ height=300,
392
+ container=False
393
+ )
394
+ with gr.Column(scale=1):
395
+ gr.Markdown("""
396
+ **Deployment Steps:**
397
+
398
+ **1. Generate Package** - Download zip file
399
+
400
+ **2. Create Space** - New HuggingFace Space (Gradio SDK)
401
+
402
+ **3. Upload Files** - app.py and requirements.txt
403
+
404
+ **4. Add API Key** - Secret configuration
405
+
406
+ **5. Deploy & Test** - Wait for build, then test
407
+ """)
408
+
409
+ gr.Markdown("""
410
+ **Detailed Steps:**
411
 
412
  **1. Generate & Upload**
413
  - Click "Generate Deployment Package" → download zip
 
490
  - Check for typos in the access code
491
  - Case-sensitive matching
492
 
 
 
493
  **URLs not fetching content**
494
  - Check URLs are publicly accessible
495
  - Some sites block automated requests
 
562
  - Language practice partners
563
  """)
564
 
565
+ def export_conversation_to_markdown(conversation_history, config_metadata=None):
566
+ """Export conversation history to markdown format with configuration metadata"""
567
  if not conversation_history:
568
  return "No conversation to export."
569
 
570
  markdown_content = f"""# Conversation Export
571
  Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
572
 
573
+ """
574
+
575
+ # Add configuration metadata if provided
576
+ if config_metadata:
577
+ markdown_content += """## Configuration Information
578
 
579
  """
580
+
581
+ # Add basic configuration details
582
+ if config_metadata.get('name'):
583
+ markdown_content += f"**Assistant Name:** {config_metadata['name']}\n"
584
+ if config_metadata.get('description'):
585
+ markdown_content += f"**Description:** {config_metadata['description']}\n"
586
+ if config_metadata.get('model'):
587
+ markdown_content += f"**Model:** {config_metadata['model']}\n"
588
+ if config_metadata.get('temperature'):
589
+ markdown_content += f"**Temperature:** {config_metadata['temperature']}\n"
590
+ if config_metadata.get('max_tokens'):
591
+ markdown_content += f"**Max Tokens:** {config_metadata['max_tokens']}\n"
592
+
593
+ # Add URL grounding information
594
+ grounding_urls = []
595
+ for i in range(1, 5):
596
+ url = config_metadata.get(f'url{i}')
597
+ if url and url.strip():
598
+ grounding_urls.append(url.strip())
599
+
600
+ if grounding_urls:
601
+ markdown_content += f"\n**URL Grounding ({len(grounding_urls)} URLs):**\n"
602
+ for i, url in enumerate(grounding_urls, 1):
603
+ markdown_content += f"- URL {i}: {url}\n"
604
+
605
+ # Add feature flags
606
+ if config_metadata.get('enable_dynamic_urls'):
607
+ markdown_content += f"\n**Dynamic URL Fetching:** Enabled\n"
608
+
609
+ # Add system prompt
610
+ if config_metadata.get('system_prompt'):
611
+ system_prompt = config_metadata['system_prompt']
612
+ markdown_content += f"\n**System Prompt:**\n```\n{system_prompt}\n```\n"
613
+
614
+ markdown_content += "\n---\n\n"
615
+ else:
616
+ markdown_content += "---\n\n"
617
 
618
  for i, message in enumerate(conversation_history):
619
  if isinstance(message, dict):
vector_store.py DELETED
@@ -1,308 +0,0 @@
1
- import numpy as np
2
- import pickle
3
- import base64
4
- from typing import List, Dict, Any, Tuple, Optional
5
- import json
6
- from dataclasses import dataclass
7
-
8
- try:
9
- from sentence_transformers import SentenceTransformer
10
- HAS_SENTENCE_TRANSFORMERS = True
11
- except ImportError:
12
- HAS_SENTENCE_TRANSFORMERS = False
13
-
14
- try:
15
- import faiss
16
- HAS_FAISS = True
17
- except ImportError:
18
- HAS_FAISS = False
19
-
20
-
21
- @dataclass
22
- class SearchResult:
23
- chunk_id: str
24
- text: str
25
- score: float
26
- metadata: Dict[str, Any]
27
-
28
-
29
- class VectorStore:
30
- def __init__(self, embedding_model: str = "all-MiniLM-L6-v2"):
31
- self.embedding_model_name = embedding_model
32
- self.embedding_model = None
33
- self.index = None
34
- self.chunks = {} # chunk_id -> chunk data
35
- self.chunk_ids = [] # Ordered list for FAISS index mapping
36
- self.dimension = 384 # Default for all-MiniLM-L6-v2
37
-
38
- if HAS_SENTENCE_TRANSFORMERS:
39
- self._initialize_model()
40
-
41
- def _initialize_model(self):
42
- """Initialize the embedding model"""
43
- if not HAS_SENTENCE_TRANSFORMERS:
44
- raise ImportError("sentence-transformers not installed")
45
-
46
- try:
47
- print(f"Loading embedding model: {self.embedding_model_name}")
48
- print("This may take a moment on first run as the model downloads...")
49
-
50
- # Set environment variables to prevent multiprocessing issues
51
- import os
52
- os.environ['TOKENIZERS_PARALLELISM'] = 'false'
53
- os.environ['OMP_NUM_THREADS'] = '1'
54
- os.environ['MKL_NUM_THREADS'] = '1'
55
-
56
- # Initialize with specific settings to avoid multiprocessing issues
57
- self.embedding_model = SentenceTransformer(
58
- self.embedding_model_name,
59
- device='cpu', # Force CPU to avoid GPU/multiprocessing conflicts
60
- cache_folder=None, # Use default cache
61
- # Additional parameters to reduce memory usage
62
- use_auth_token=False,
63
- trust_remote_code=False # Security best practice
64
- )
65
-
66
- # Disable multiprocessing for stability in web apps
67
- if hasattr(self.embedding_model, 'pool'):
68
- self.embedding_model.pool = None
69
-
70
- # Additional stability measures for Gradio environment
71
- if hasattr(self.embedding_model, '_modules'):
72
- for module in self.embedding_model._modules.values():
73
- if hasattr(module, 'num_workers'):
74
- module.num_workers = 0
75
-
76
- # Update dimension based on model
77
- self.dimension = self.embedding_model.get_sentence_embedding_dimension()
78
- print(f"Model loaded successfully, dimension: {self.dimension}")
79
- except Exception as e:
80
- print(f"Failed to initialize embedding model: {e}")
81
- # Provide more specific error messages
82
- if "connection" in str(e).lower() or "timeout" in str(e).lower():
83
- raise RuntimeError(f"Network error downloading model '{self.embedding_model_name}'. "
84
- f"Please check your internet connection and try again: {e}")
85
- elif "memory" in str(e).lower() or "out of memory" in str(e).lower():
86
- raise RuntimeError(f"Insufficient memory to load model '{self.embedding_model_name}'. "
87
- f"Try using a smaller model or increase available memory: {e}")
88
- else:
89
- raise RuntimeError(f"Could not load embedding model '{self.embedding_model_name}': {e}")
90
-
91
- def create_embeddings(self, texts: List[str], batch_size: int = 8) -> np.ndarray:
92
- """Create embeddings for a list of texts"""
93
- if not self.embedding_model:
94
- self._initialize_model()
95
-
96
- # Use smaller batch size for stability
97
- embeddings = []
98
-
99
- try:
100
- print(f"Creating embeddings for {len(texts)} text chunks...")
101
- for i in range(0, len(texts), batch_size):
102
- batch = texts[i:i + batch_size]
103
- print(f"Processing batch {i//batch_size + 1}/{(len(texts) + batch_size - 1)//batch_size}")
104
-
105
- batch_embeddings = self.embedding_model.encode(
106
- batch,
107
- convert_to_numpy=True,
108
- show_progress_bar=False,
109
- device='cpu', # Force CPU to avoid GPU conflicts
110
- normalize_embeddings=False, # We'll normalize later with FAISS
111
- batch_size=min(batch_size, 4) # Extra safety on batch size
112
- )
113
- embeddings.append(batch_embeddings)
114
-
115
- # Import gc for garbage collection
116
- import gc
117
- gc.collect() # Force garbage collection between batches
118
-
119
- except Exception as e:
120
- # Log the error and provide a helpful message
121
- print(f"Error creating embeddings: {e}")
122
- if "cuda" in str(e).lower() or "gpu" in str(e).lower():
123
- raise RuntimeError(f"GPU/CUDA error encountered. The model is configured to use CPU only. Error: {e}")
124
- elif "memory" in str(e).lower() or "out of memory" in str(e).lower():
125
- raise RuntimeError(f"Out of memory while creating embeddings. Try uploading smaller files or fewer files at once: {e}")
126
- else:
127
- raise RuntimeError(f"Failed to create embeddings: {e}")
128
-
129
- return np.vstack(embeddings) if embeddings else np.array([])
130
-
131
- def build_index(self, chunks: List[Dict[str, Any]], show_progress: bool = True):
132
- """Build FAISS index from chunks"""
133
- if not HAS_FAISS:
134
- raise ImportError("faiss-cpu not installed")
135
-
136
- # Extract texts and build embeddings
137
- texts = [chunk['text'] for chunk in chunks]
138
-
139
- if show_progress:
140
- print(f"Creating embeddings for {len(texts)} chunks...")
141
-
142
- embeddings = self.create_embeddings(texts)
143
-
144
- # Build FAISS index
145
- if show_progress:
146
- print("Building FAISS index...")
147
-
148
- # Use IndexFlatIP for inner product (cosine similarity with normalized vectors)
149
- self.index = faiss.IndexFlatIP(self.dimension)
150
-
151
- # Normalize embeddings for cosine similarity
152
- faiss.normalize_L2(embeddings)
153
-
154
- # Add to index
155
- self.index.add(embeddings)
156
-
157
- # Store chunks and maintain mapping
158
- self.chunks = {}
159
- self.chunk_ids = []
160
-
161
- for chunk in chunks:
162
- chunk_id = chunk['chunk_id']
163
- self.chunks[chunk_id] = chunk
164
- self.chunk_ids.append(chunk_id)
165
-
166
- if show_progress:
167
- print(f"Index built with {len(chunks)} chunks")
168
-
169
- def search(self, query: str, top_k: int = 5, score_threshold: float = 0.3) -> List[SearchResult]:
170
- """Search for similar chunks"""
171
- if not self.index or not self.chunks:
172
- return []
173
-
174
- # Create query embedding
175
- query_embedding = self.create_embeddings([query])
176
-
177
- # Normalize for cosine similarity
178
- faiss.normalize_L2(query_embedding)
179
-
180
- # Search
181
- scores, indices = self.index.search(query_embedding, min(top_k, len(self.chunks)))
182
-
183
- # Convert to results
184
- results = []
185
-
186
- for score, idx in zip(scores[0], indices[0]):
187
- if idx < 0 or score < score_threshold:
188
- continue
189
-
190
- chunk_id = self.chunk_ids[idx]
191
- chunk = self.chunks[chunk_id]
192
-
193
- result = SearchResult(
194
- chunk_id=chunk_id,
195
- text=chunk['text'],
196
- score=float(score),
197
- metadata=chunk.get('metadata', {})
198
- )
199
- results.append(result)
200
-
201
- return results
202
-
203
- def serialize(self) -> Dict[str, Any]:
204
- """Serialize the vector store for deployment"""
205
- if not self.index:
206
- raise ValueError("No index to serialize")
207
-
208
- # Serialize FAISS index
209
- index_bytes = faiss.serialize_index(self.index)
210
- index_base64 = base64.b64encode(index_bytes).decode('utf-8')
211
-
212
- return {
213
- 'index_base64': index_base64,
214
- 'chunks': self.chunks,
215
- 'chunk_ids': self.chunk_ids,
216
- 'dimension': self.dimension,
217
- 'model_name': self.embedding_model_name
218
- }
219
-
220
- @classmethod
221
- def deserialize(cls, data: Dict[str, Any]) -> 'VectorStore':
222
- """Deserialize a vector store from deployment data"""
223
- if not HAS_FAISS:
224
- raise ImportError("faiss-cpu not installed")
225
-
226
- store = cls(embedding_model=data['model_name'])
227
-
228
- # Deserialize FAISS index
229
- index_bytes = base64.b64decode(data['index_base64'])
230
- store.index = faiss.deserialize_index(index_bytes)
231
-
232
- # Restore chunks and mappings
233
- store.chunks = data['chunks']
234
- store.chunk_ids = data['chunk_ids']
235
- store.dimension = data['dimension']
236
-
237
- return store
238
-
239
- def get_stats(self) -> Dict[str, Any]:
240
- """Get statistics about the vector store"""
241
- return {
242
- 'total_chunks': len(self.chunks),
243
- 'index_size': self.index.ntotal if self.index else 0,
244
- 'dimension': self.dimension,
245
- 'model': self.embedding_model_name
246
- }
247
-
248
-
249
- class LightweightVectorStore:
250
- """Lightweight version for deployed spaces without embedding model"""
251
-
252
- def __init__(self, serialized_data: Dict[str, Any]):
253
- if not HAS_FAISS:
254
- raise ImportError("faiss-cpu not installed")
255
-
256
- # Deserialize FAISS index
257
- index_bytes = base64.b64decode(serialized_data['index_base64'])
258
- self.index = faiss.deserialize_index(index_bytes)
259
-
260
- # Restore chunks and mappings
261
- self.chunks = serialized_data['chunks']
262
- self.chunk_ids = serialized_data['chunk_ids']
263
- self.dimension = serialized_data['dimension']
264
-
265
- # For query embedding, we'll need to include pre-computed embeddings
266
- # or use a lightweight embedding service
267
- self.query_embeddings_cache = serialized_data.get('query_embeddings_cache', {})
268
-
269
- def search_with_embedding(self, query_embedding: np.ndarray, top_k: int = 5, score_threshold: float = 0.3) -> List[SearchResult]:
270
- """Search using pre-computed query embedding"""
271
- if not self.index or not self.chunks:
272
- return []
273
-
274
- # Normalize for cosine similarity
275
- faiss.normalize_L2(query_embedding)
276
-
277
- # Search
278
- scores, indices = self.index.search(query_embedding, min(top_k, len(self.chunks)))
279
-
280
- # Convert to results
281
- results = []
282
-
283
- for score, idx in zip(scores[0], indices[0]):
284
- if idx < 0 or score < score_threshold:
285
- continue
286
-
287
- chunk_id = self.chunk_ids[idx]
288
- chunk = self.chunks[chunk_id]
289
-
290
- result = SearchResult(
291
- chunk_id=chunk_id,
292
- text=chunk['text'],
293
- score=float(score),
294
- metadata=chunk.get('metadata', {})
295
- )
296
- results.append(result)
297
-
298
- return results
299
-
300
-
301
- # Utility functions
302
- def estimate_index_size(num_chunks: int, dimension: int = 384) -> float:
303
- """Estimate the size of the index in MB"""
304
- # Rough estimation: 4 bytes per float * dimension * num_chunks
305
- bytes_size = 4 * dimension * num_chunks
306
- # Add overhead for index structure and metadata
307
- overhead = 1.2
308
- return (bytes_size * overhead) / (1024 * 1024)