milwright commited on
Commit
7404ace
Β·
verified Β·
1 Parent(s): ede304c

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +3 -3
  2. app.py +574 -0
  3. config.json +5 -6
  4. requirements.txt +2 -4
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: search-aid
3
  emoji: πŸ€–
4
  colorFrom: blue
5
  colorTo: red
@@ -9,7 +9,7 @@ app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- # search-aid
13
 
14
 
15
 
@@ -53,7 +53,7 @@ pinned: false
53
 
54
  ## Configuration
55
 
56
- - **Model**: google/gemini-2.0-flash-001
57
  - **Temperature**: 0.7
58
  - **Max Tokens**: 1500
59
  - **API Key Variable**: OPENROUTER_API_KEY
 
1
  ---
2
+ title: Britannica Wiki Search
3
  emoji: πŸ€–
4
  colorFrom: blue
5
  colorTo: red
 
9
  pinned: false
10
  ---
11
 
12
+ # Britannica Wiki Search
13
 
14
 
15
 
 
53
 
54
  ## Configuration
55
 
56
+ - **Model**: openai/gpt-4o-mini-search-preview
57
  - **Temperature**: 0.7
58
  - **Max Tokens**: 1500
59
  - **API Key Variable**: OPENROUTER_API_KEY
app.py ADDED
@@ -0,0 +1,574 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import tempfile
3
+ import os
4
+ import requests
5
+ import json
6
+ import re
7
+ from bs4 import BeautifulSoup
8
+ from datetime import datetime
9
+ import urllib.parse
10
+
11
+
12
+ # Configuration
13
+ SPACE_NAME = "Britannica Wiki Search"
14
+ SPACE_DESCRIPTION = ""
15
+ SYSTEM_PROMPT = """You are a research aid specializing in academic literature search and analysis. Your expertise spans discovering peer-reviewed sources, assessing research methodologies, synthesizing findings across studies, and delivering properly formatted citations. When responding, anchor claims in specific sources from provided URL contexts, differentiate between direct evidence and interpretive analysis, and note any limitations or contradictory results. Employ clear, accessible language that demystifies complex research, and propose connected research directions when appropriate. Your purpose is to serve as an informed research tool supporting users through initial concept development, exploratory investigation, information collection, and source compilation."""
16
+ MODEL = "openai/gpt-4o-mini-search-preview"
17
+ GROUNDING_URLS = ["https://www.wikipedia.org/", "https://www.britannica.com/"]
18
+ # Get access code from environment variable for security
19
+ ACCESS_CODE = os.environ.get("SPACE_ACCESS_CODE", "")
20
+ ENABLE_DYNAMIC_URLS = True
21
+ ENABLE_VECTOR_RAG = False
22
+ RAG_DATA = None
23
+
24
+ # Get API key from environment - customizable variable name with validation
25
+ API_KEY = os.environ.get("OPENROUTER_API_KEY")
26
+ if API_KEY:
27
+ API_KEY = API_KEY.strip() # Remove any whitespace
28
+ if not API_KEY: # Check if empty after stripping
29
+ API_KEY = None
30
+
31
+ # API Key validation and logging
32
+ def validate_api_key():
33
+ """Validate API key configuration with detailed logging"""
34
+ if not API_KEY:
35
+ print(f"⚠️ API KEY CONFIGURATION ERROR:")
36
+ print(f" Variable name: {api_key_var}")
37
+ print(f" Status: Not set or empty")
38
+ print(f" Action needed: Set '{api_key_var}' in HuggingFace Space secrets")
39
+ print(f" Expected format: sk-or-xxxxxxxxxx")
40
+ return False
41
+ elif not API_KEY.startswith('sk-or-'):
42
+ print(f"⚠️ API KEY FORMAT WARNING:")
43
+ print(f" Variable name: {api_key_var}")
44
+ print(f" Current value: {API_KEY[:10]}..." if len(API_KEY) > 10 else API_KEY)
45
+ print(f" Expected format: sk-or-xxxxxxxxxx")
46
+ print(f" Note: OpenRouter keys should start with 'sk-or-'")
47
+ return True # Still try to use it
48
+ else:
49
+ print(f"βœ… API Key configured successfully")
50
+ print(f" Variable: {api_key_var}")
51
+ print(f" Format: Valid OpenRouter key")
52
+ return True
53
+
54
+ # Validate on startup
55
+ try:
56
+ API_KEY_VALID = validate_api_key()
57
+ except NameError:
58
+ # During template generation, API_KEY might not be defined yet
59
+ API_KEY_VALID = False
60
+
61
+ def validate_url_domain(url):
62
+ """Basic URL domain validation"""
63
+ try:
64
+ from urllib.parse import urlparse
65
+ parsed = urlparse(url)
66
+ # Check for valid domain structure
67
+ if parsed.netloc and '.' in parsed.netloc:
68
+ return True
69
+ except:
70
+ pass
71
+ return False
72
+
73
+ def fetch_url_content(url):
74
+ """Enhanced URL content fetching with improved compatibility and error handling"""
75
+ if not validate_url_domain(url):
76
+ return f"Invalid URL format: {url}"
77
+
78
+ try:
79
+ # Enhanced headers for better compatibility
80
+ headers = {
81
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
82
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
83
+ 'Accept-Language': 'en-US,en;q=0.5',
84
+ 'Accept-Encoding': 'gzip, deflate',
85
+ 'Connection': 'keep-alive'
86
+ }
87
+
88
+ response = requests.get(url, timeout=15, headers=headers)
89
+ response.raise_for_status()
90
+ soup = BeautifulSoup(response.content, 'html.parser')
91
+
92
+ # Enhanced content cleaning
93
+ for element in soup(["script", "style", "nav", "header", "footer", "aside", "form", "button"]):
94
+ element.decompose()
95
+
96
+ # Extract main content preferentially
97
+ main_content = soup.find('main') or soup.find('article') or soup.find('div', class_=lambda x: bool(x and 'content' in x.lower())) or soup
98
+ text = main_content.get_text()
99
+
100
+ # Enhanced text cleaning
101
+ lines = (line.strip() for line in text.splitlines())
102
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
103
+ text = ' '.join(chunk for chunk in chunks if chunk and len(chunk) > 2)
104
+
105
+ # Smart truncation - try to end at sentence boundaries
106
+ if len(text) > 4000:
107
+ truncated = text[:4000]
108
+ last_period = truncated.rfind('.')
109
+ if last_period > 3000: # If we can find a reasonable sentence break
110
+ text = truncated[:last_period + 1]
111
+ else:
112
+ text = truncated + "..."
113
+
114
+ return text if text.strip() else "No readable content found at this URL"
115
+
116
+ except requests.exceptions.Timeout:
117
+ return f"Timeout error fetching {url} (15s limit exceeded)"
118
+ except requests.exceptions.RequestException as e:
119
+ return f"Error fetching {url}: {str(e)}"
120
+ except Exception as e:
121
+ return f"Error processing content from {url}: {str(e)}"
122
+
123
+ def extract_urls_from_text(text):
124
+ """Extract URLs from text using regex with enhanced validation"""
125
+ import re
126
+ url_pattern = r'https?://[^\s<>"{}|\\^`\[\]"]+'
127
+ urls = re.findall(url_pattern, text)
128
+
129
+ # Basic URL validation and cleanup
130
+ validated_urls = []
131
+ for url in urls:
132
+ # Remove trailing punctuation that might be captured
133
+ url = url.rstrip('.,!?;:')
134
+ # Basic domain validation
135
+ if '.' in url and len(url) > 10:
136
+ validated_urls.append(url)
137
+
138
+ return validated_urls
139
+
140
+ # Global cache for URL content to avoid re-crawling in generated spaces
141
+ _url_content_cache = {}
142
+
143
+ def get_grounding_context():
144
+ """Fetch context from grounding URLs with caching"""
145
+ if not GROUNDING_URLS:
146
+ return ""
147
+
148
+ # Create cache key from URLs
149
+ cache_key = tuple(sorted([url for url in GROUNDING_URLS if url and url.strip()]))
150
+
151
+ # Check cache first
152
+ if cache_key in _url_content_cache:
153
+ return _url_content_cache[cache_key]
154
+
155
+ context_parts = []
156
+ for i, url in enumerate(GROUNDING_URLS, 1):
157
+ if url.strip():
158
+ content = fetch_url_content(url.strip())
159
+ context_parts.append(f"Context from URL {i} ({url}):\n{content}")
160
+
161
+ if context_parts:
162
+ result = "\n\n" + "\n\n".join(context_parts) + "\n\n"
163
+ else:
164
+ result = ""
165
+
166
+ # Cache the result
167
+ _url_content_cache[cache_key] = result
168
+ return result
169
+
170
+ def export_conversation_to_markdown(conversation_history):
171
+ """Export conversation history to markdown format"""
172
+ if not conversation_history:
173
+ return "No conversation to export."
174
+
175
+ markdown_content = f"""# Conversation Export
176
+ Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
177
+
178
+ ---
179
+
180
+ """
181
+
182
+ message_pair_count = 0
183
+ for i, message in enumerate(conversation_history):
184
+ if isinstance(message, dict):
185
+ role = message.get('role', 'unknown')
186
+ content = message.get('content', '')
187
+
188
+ if role == 'user':
189
+ message_pair_count += 1
190
+ markdown_content += f"## User Message {message_pair_count}\n\n{content}\n\n"
191
+ elif role == 'assistant':
192
+ markdown_content += f"## Assistant Response {message_pair_count}\n\n{content}\n\n---\n\n"
193
+ elif isinstance(message, (list, tuple)) and len(message) >= 2:
194
+ # Handle legacy tuple format: ["user msg", "assistant msg"]
195
+ message_pair_count += 1
196
+ user_msg, assistant_msg = message[0], message[1]
197
+ if user_msg:
198
+ markdown_content += f"## User Message {message_pair_count}\n\n{user_msg}\n\n"
199
+ if assistant_msg:
200
+ markdown_content += f"## Assistant Response {message_pair_count}\n\n{assistant_msg}\n\n---\n\n"
201
+
202
+ return markdown_content
203
+
204
+ # Initialize RAG context if enabled
205
+ if ENABLE_VECTOR_RAG and RAG_DATA:
206
+ try:
207
+ import faiss
208
+ import numpy as np
209
+ import base64
210
+
211
+ class SimpleRAGContext:
212
+ def __init__(self, rag_data):
213
+ # Deserialize FAISS index
214
+ index_bytes = base64.b64decode(rag_data['index_base64'])
215
+ self.index = faiss.deserialize_index(index_bytes)
216
+
217
+ # Restore chunks and mappings
218
+ self.chunks = rag_data['chunks']
219
+ self.chunk_ids = rag_data['chunk_ids']
220
+
221
+ def get_context(self, query, max_chunks=3):
222
+ """Get relevant context - simplified version"""
223
+ # In production, you'd compute query embedding here
224
+ # For now, return a simple message
225
+ return "\n\n[RAG context would be retrieved here based on similarity search]\n\n"
226
+
227
+ rag_context_provider = SimpleRAGContext(RAG_DATA)
228
+ except Exception as e:
229
+ print(f"Failed to initialize RAG: {e}")
230
+ rag_context_provider = None
231
+ else:
232
+ rag_context_provider = None
233
+
234
+ def generate_response(message, history):
235
+ """Generate response using OpenRouter API"""
236
+
237
+ # Enhanced API key validation with helpful messages
238
+ if not API_KEY:
239
+ error_msg = f"πŸ”‘ **API Key Required**\n\n"
240
+ error_msg += f"Please configure your OpenRouter API key:\n"
241
+ error_msg += f"1. Go to Settings (βš™οΈ) in your HuggingFace Space\n"
242
+ error_msg += f"2. Click 'Variables and secrets'\n"
243
+ error_msg += f"3. Add secret: **{api_key_var}**\n"
244
+ error_msg += f"4. Value: Your OpenRouter API key (starts with `sk-or-`)\n\n"
245
+ error_msg += f"Get your API key at: https://openrouter.ai/keys"
246
+ print(f"❌ API request failed: No API key configured for {api_key_var}")
247
+ return error_msg
248
+
249
+ # Get grounding context
250
+ grounding_context = get_grounding_context()
251
+
252
+ # Add RAG context if available
253
+ if ENABLE_VECTOR_RAG and rag_context_provider:
254
+ rag_context = rag_context_provider.get_context(message)
255
+ if rag_context:
256
+ grounding_context += rag_context
257
+
258
+ # If dynamic URLs are enabled, check message for URLs to fetch
259
+ if ENABLE_DYNAMIC_URLS:
260
+ urls_in_message = extract_urls_from_text(message)
261
+ if urls_in_message:
262
+ # Fetch content from URLs mentioned in the message
263
+ dynamic_context_parts = []
264
+ for url in urls_in_message[:3]: # Limit to 3 URLs per message
265
+ content = fetch_url_content(url)
266
+ dynamic_context_parts.append(f"\n\nDynamic context from {url}:\n{content}")
267
+ if dynamic_context_parts:
268
+ grounding_context += "\n".join(dynamic_context_parts)
269
+
270
+ # Build enhanced system prompt with grounding context
271
+ enhanced_system_prompt = SYSTEM_PROMPT + grounding_context
272
+
273
+ # Build messages array for the API
274
+ messages = [{"role": "system", "content": enhanced_system_prompt}]
275
+
276
+ # Add conversation history - handle both modern messages format and legacy tuples
277
+ for chat in history:
278
+ if isinstance(chat, dict):
279
+ # Modern format: {"role": "user", "content": "..."} or {"role": "assistant", "content": "..."}
280
+ messages.append(chat)
281
+ elif isinstance(chat, (list, tuple)) and len(chat) >= 2:
282
+ # Legacy format: ["user msg", "assistant msg"] or ("user msg", "assistant msg")
283
+ user_msg, assistant_msg = chat[0], chat[1]
284
+ if user_msg:
285
+ messages.append({"role": "user", "content": user_msg})
286
+ if assistant_msg:
287
+ messages.append({"role": "assistant", "content": assistant_msg})
288
+
289
+ # Add current message
290
+ messages.append({"role": "user", "content": message})
291
+
292
+ # Make API request with enhanced error handling
293
+ try:
294
+ print(f"πŸ”„ Making API request to OpenRouter...")
295
+ print(f" Model: {MODEL}")
296
+ print(f" Messages: {len(messages)} in conversation")
297
+
298
+ response = requests.post(
299
+ url="https://openrouter.ai/api/v1/chat/completions",
300
+ headers={
301
+ "Authorization": f"Bearer {API_KEY}",
302
+ "Content-Type": "application/json",
303
+ "HTTP-Referer": "https://huggingface.co", # Required by some providers
304
+ "X-Title": "HuggingFace Space" # Helpful for tracking
305
+ },
306
+ json={
307
+ "model": MODEL,
308
+ "messages": messages,
309
+ "temperature": 0.7,
310
+ "max_tokens": 1500
311
+ },
312
+ timeout=30
313
+ )
314
+
315
+ print(f"πŸ“‘ API Response: {response.status_code}")
316
+
317
+ if response.status_code == 200:
318
+ try:
319
+ result = response.json()
320
+
321
+ # Enhanced validation of API response structure
322
+ if 'choices' not in result or not result['choices']:
323
+ print(f"⚠️ API response missing choices: {result}")
324
+ return "API Error: No response choices available"
325
+ elif 'message' not in result['choices'][0]:
326
+ print(f"⚠️ API response missing message: {result}")
327
+ return "API Error: No message in response"
328
+ elif 'content' not in result['choices'][0]['message']:
329
+ print(f"⚠️ API response missing content: {result}")
330
+ return "API Error: No content in message"
331
+ else:
332
+ content = result['choices'][0]['message']['content']
333
+
334
+ # Check for empty content
335
+ if not content or content.strip() == "":
336
+ print(f"⚠️ API returned empty content")
337
+ return "API Error: Empty response content"
338
+
339
+ print(f"βœ… API request successful")
340
+ return content
341
+
342
+ except (KeyError, IndexError, json.JSONDecodeError) as e:
343
+ print(f"❌ Failed to parse API response: {str(e)}")
344
+ return f"API Error: Failed to parse response - {str(e)}"
345
+ elif response.status_code == 401:
346
+ error_msg = f"πŸ” **Authentication Error**\n\n"
347
+ error_msg += f"Your API key appears to be invalid or expired.\n\n"
348
+ error_msg += f"**Troubleshooting:**\n"
349
+ error_msg += f"1. Check that your **{api_key_var}** secret is set correctly\n"
350
+ error_msg += f"2. Verify your API key at: https://openrouter.ai/keys\n"
351
+ error_msg += f"3. Ensure your key starts with `sk-or-`\n"
352
+ error_msg += f"4. Check that you have credits on your OpenRouter account"
353
+ print(f"❌ API authentication failed: {response.status_code} - {response.text[:200]}")
354
+ return error_msg
355
+ elif response.status_code == 429:
356
+ error_msg = f"⏱️ **Rate Limit Exceeded**\n\n"
357
+ error_msg += f"Too many requests. Please wait a moment and try again.\n\n"
358
+ error_msg += f"**Troubleshooting:**\n"
359
+ error_msg += f"1. Wait 30-60 seconds before trying again\n"
360
+ error_msg += f"2. Check your OpenRouter usage limits\n"
361
+ error_msg += f"3. Consider upgrading your OpenRouter plan"
362
+ print(f"❌ Rate limit exceeded: {response.status_code}")
363
+ return error_msg
364
+ elif response.status_code == 400:
365
+ try:
366
+ error_data = response.json()
367
+ error_message = error_data.get('error', {}).get('message', 'Unknown error')
368
+ except:
369
+ error_message = response.text
370
+
371
+ error_msg = f"⚠️ **Request Error**\n\n"
372
+ error_msg += f"The API request was invalid:\n"
373
+ error_msg += f"`{error_message}`\n\n"
374
+ if "model" in error_message.lower():
375
+ error_msg += f"**Model Issue:** The model `{MODEL}` may not be available.\n"
376
+ error_msg += f"Try switching to a different model in your Space configuration."
377
+ print(f"❌ Bad request: {response.status_code} - {error_message}")
378
+ return error_msg
379
+ else:
380
+ error_msg = f"🚫 **API Error {response.status_code}**\n\n"
381
+ error_msg += f"An unexpected error occurred. Please try again.\n\n"
382
+ error_msg += f"If this persists, check:\n"
383
+ error_msg += f"1. OpenRouter service status\n"
384
+ error_msg += f"2. Your API key and credits\n"
385
+ error_msg += f"3. The model availability"
386
+ print(f"❌ API error: {response.status_code} - {response.text[:200]}")
387
+ return error_msg
388
+
389
+ except requests.exceptions.Timeout:
390
+ error_msg = f"⏰ **Request Timeout**\n\n"
391
+ error_msg += f"The API request took too long (30s limit).\n\n"
392
+ error_msg += f"**Troubleshooting:**\n"
393
+ error_msg += f"1. Try again with a shorter message\n"
394
+ error_msg += f"2. Check your internet connection\n"
395
+ error_msg += f"3. Try a different model"
396
+ print(f"❌ Request timeout after 30 seconds")
397
+ return error_msg
398
+ except requests.exceptions.ConnectionError:
399
+ error_msg = f"🌐 **Connection Error**\n\n"
400
+ error_msg += f"Could not connect to OpenRouter API.\n\n"
401
+ error_msg += f"**Troubleshooting:**\n"
402
+ error_msg += f"1. Check your internet connection\n"
403
+ error_msg += f"2. Check OpenRouter service status\n"
404
+ error_msg += f"3. Try again in a few moments"
405
+ print(f"❌ Connection error to OpenRouter API")
406
+ return error_msg
407
+ except Exception as e:
408
+ error_msg = f"❌ **Unexpected Error**\n\n"
409
+ error_msg += f"An unexpected error occurred:\n"
410
+ error_msg += f"`{str(e)}`\n\n"
411
+ error_msg += f"Please try again or contact support if this persists."
412
+ print(f"❌ Unexpected error: {str(e)}")
413
+ return error_msg
414
+
415
+ # Access code verification
416
+ access_granted = gr.State(False)
417
+ _access_granted_global = False # Global fallback
418
+
419
+ def verify_access_code(code):
420
+ """Verify the access code"""
421
+ global _access_granted_global
422
+ if not ACCESS_CODE:
423
+ _access_granted_global = True
424
+ return gr.update(visible=False), gr.update(visible=True), gr.update(value=True)
425
+
426
+ if code == ACCESS_CODE:
427
+ _access_granted_global = True
428
+ return gr.update(visible=False), gr.update(visible=True), gr.update(value=True)
429
+ else:
430
+ _access_granted_global = False
431
+ return gr.update(visible=True, value="❌ Incorrect access code. Please try again."), gr.update(visible=False), gr.update(value=False)
432
+
433
+ def protected_generate_response(message, history):
434
+ """Protected response function that checks access"""
435
+ # Check if access is granted via the global variable
436
+ if ACCESS_CODE and not _access_granted_global:
437
+ return "Please enter the access code to continue."
438
+ return generate_response(message, history)
439
+
440
+ # Global variable to store chat history for export
441
+ chat_history_store = []
442
+
443
+ def store_and_generate_response(message, history):
444
+ """Wrapper function that stores history and generates response"""
445
+ global chat_history_store
446
+
447
+ # Store the updated history
448
+ chat_history_store = history.copy() if history else []
449
+
450
+ # Generate response using the protected function
451
+ response = protected_generate_response(message, history)
452
+
453
+ # Update stored history with the new exchange
454
+ chat_history_store.append({"role": "user", "content": message})
455
+ chat_history_store.append({"role": "assistant", "content": response})
456
+
457
+ return response
458
+
459
+ def export_current_conversation():
460
+ """Export the current conversation"""
461
+ if not chat_history_store:
462
+ return gr.update(visible=False)
463
+
464
+ markdown_content = export_conversation_to_markdown(chat_history_store)
465
+
466
+ # Save to temporary file
467
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as f:
468
+ f.write(markdown_content)
469
+ temp_file = f.name
470
+
471
+ return gr.update(value=temp_file, visible=True)
472
+
473
+ def export_conversation(history):
474
+ """Export conversation to markdown file"""
475
+ if not history:
476
+ return gr.update(visible=False)
477
+
478
+ markdown_content = export_conversation_to_markdown(history)
479
+
480
+ # Save to temporary file
481
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as f:
482
+ f.write(markdown_content)
483
+ temp_file = f.name
484
+
485
+ return gr.update(value=temp_file, visible=True)
486
+
487
+ # Configuration status display
488
+ def get_configuration_status():
489
+ """Generate a configuration status message for display"""
490
+ status_parts = []
491
+
492
+ if API_KEY_VALID:
493
+ status_parts.append("βœ… **API Key:** Configured and valid")
494
+ else:
495
+ status_parts.append("❌ **API Key:** Not configured - Set `{api_key_var}` in Space secrets")
496
+
497
+ status_parts.append(f"πŸ€– **Model:** {MODEL}")
498
+ status_parts.append(f"🌑️ **Temperature:** 0.7")
499
+ status_parts.append(f"πŸ“ **Max Tokens:** 1500")
500
+
501
+ if GROUNDING_URLS:
502
+ status_parts.append(f"πŸ”— **URL Grounding:** {len(GROUNDING_URLS)} URLs configured")
503
+
504
+ if ENABLE_DYNAMIC_URLS:
505
+ status_parts.append("πŸ”„ **Dynamic URLs:** Enabled")
506
+
507
+ if ENABLE_VECTOR_RAG:
508
+ status_parts.append("πŸ“š **Document RAG:** Enabled")
509
+
510
+ if ACCESS_CODE:
511
+ status_parts.append("πŸ” **Access Control:** Enabled")
512
+ else:
513
+ status_parts.append("🌐 **Access:** Public")
514
+
515
+ return "\n".join(status_parts)
516
+
517
+ # Create interface with access code protection
518
+ with gr.Blocks(title=SPACE_NAME) as demo:
519
+ gr.Markdown(f"# {SPACE_NAME}")
520
+ gr.Markdown(SPACE_DESCRIPTION)
521
+
522
+ # Configuration status (always visible)
523
+ with gr.Accordion("πŸ“Š Configuration Status", open=not API_KEY_VALID):
524
+ gr.Markdown(get_configuration_status())
525
+
526
+ # Access code section (shown only if ACCESS_CODE is set)
527
+ with gr.Column(visible=bool(ACCESS_CODE)) as access_section:
528
+ gr.Markdown("### πŸ” Access Required")
529
+ gr.Markdown("Please enter the access code provided by your instructor:")
530
+
531
+ access_input = gr.Textbox(
532
+ label="Access Code",
533
+ placeholder="Enter access code...",
534
+ type="password"
535
+ )
536
+ access_btn = gr.Button("Submit", variant="primary")
537
+ access_error = gr.Markdown(visible=False)
538
+
539
+ # Main chat interface (hidden until access granted)
540
+ with gr.Column(visible=not bool(ACCESS_CODE)) as chat_section:
541
+ chat_interface = gr.ChatInterface(
542
+ fn=store_and_generate_response, # Use wrapper function to store history
543
+ title="", # Title already shown above
544
+ description="", # Description already shown above
545
+ examples=None,
546
+ type="messages" # Use modern message format for better compatibility
547
+ )
548
+
549
+ # Export functionality
550
+ with gr.Row():
551
+ export_btn = gr.Button("πŸ“₯ Export Conversation", variant="secondary", size="sm")
552
+ export_file = gr.File(label="Download Conversation", visible=False)
553
+
554
+ # Connect export functionality
555
+ export_btn.click(
556
+ export_current_conversation,
557
+ outputs=[export_file]
558
+ )
559
+
560
+ # Connect access verification
561
+ if ACCESS_CODE:
562
+ access_btn.click(
563
+ verify_access_code,
564
+ inputs=[access_input],
565
+ outputs=[access_error, chat_section, access_granted]
566
+ )
567
+ access_input.submit(
568
+ verify_access_code,
569
+ inputs=[access_input],
570
+ outputs=[access_error, chat_section, access_granted]
571
+ )
572
+
573
+ if __name__ == "__main__":
574
+ demo.launch()
config.json CHANGED
@@ -1,16 +1,15 @@
1
  {
2
- "name": "search-aid",
3
  "description": "",
4
- "system_prompt": "You are an advanced research assistant specializing in academic literature search and analysis. Your expertise includes finding peer-reviewed sources, critically evaluating research methodology, synthesizing insights across multiple papers, and providing properly formatted citations. When responding, ground all claims in specific sources from provided URL contexts, distinguish between direct evidence and analytical interpretation, and highlight any limitations or conflicting findings. Use clear, accessible language that makes complex research understandable, and suggest related areas of inquiry when relevant. Your goal is to be a knowledgeable research partner who helps users navigate academic information with precision and clarity.",
5
- "model": "google/gemini-2.0-flash-001",
6
  "api_key_var": "OPENROUTER_API_KEY",
7
  "temperature": 0.7,
8
  "max_tokens": 1500,
9
- "examples": "[\"Hello! How can you help me?\", \"Tell me something interesting\", \"What can you do?\"]",
10
- "grounding_urls": "[]",
11
  "access_code": "",
12
  "enable_dynamic_urls": true,
13
  "enable_vector_rag": false,
14
- "enable_web_search": true,
15
  "rag_data_json": "None"
16
  }
 
1
  {
2
+ "name": "Britannica Wiki Search",
3
  "description": "",
4
+ "system_prompt": "You are a research aid specializing in academic literature search and analysis. Your expertise spans discovering peer-reviewed sources, assessing research methodologies, synthesizing findings across studies, and delivering properly formatted citations. When responding, anchor claims in specific sources from provided URL contexts, differentiate between direct evidence and interpretive analysis, and note any limitations or contradictory results. Employ clear, accessible language that demystifies complex research, and propose connected research directions when appropriate. Your purpose is to serve as an informed research tool supporting users through initial concept development, exploratory investigation, information collection, and source compilation.",
5
+ "model": "openai/gpt-4o-mini-search-preview",
6
  "api_key_var": "OPENROUTER_API_KEY",
7
  "temperature": 0.7,
8
  "max_tokens": 1500,
9
+ "examples": "[\"Teach me about the history of the beatniks\", \"Find commentary on internet discourse\"]",
10
+ "grounding_urls": "[\"https://www.wikipedia.org/\", \"https://www.britannica.com/\"]",
11
  "access_code": "",
12
  "enable_dynamic_urls": true,
13
  "enable_vector_rag": false,
 
14
  "rag_data_json": "None"
15
  }
requirements.txt CHANGED
@@ -1,6 +1,4 @@
1
- gradio>=5.35.0
2
  requests>=2.32.3
3
  beautifulsoup4>=4.12.3
4
- crawl4ai>=0.2.0
5
- aiohttp>=3.8.0
6
- psutil>=6.1.1
 
1
+ gradio>=4.44.1
2
  requests>=2.32.3
3
  beautifulsoup4>=4.12.3
4
+ python-dotenv>=1.0.0