ginipick commited on
Commit
9a35f3e
Β·
verified Β·
1 Parent(s): f11394e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +1119 -0
app.py ADDED
@@ -0,0 +1,1119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ──────────────────────────────── Imports ────────────────────────────────
2
+ import os, json, re, logging, requests, markdown, time, io
3
+ from datetime import datetime
4
+
5
+ import streamlit as st
6
+ from openai import OpenAI # OpenAI 라이브러리
7
+
8
+ from gradio_client import Client
9
+ import pandas as pd
10
+ import PyPDF2 # For handling PDF files
11
+
12
+ # ──────────────────────────────── Environment Variables / Constants ─────────────────────────
13
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
14
+ BRAVE_KEY = os.getenv("SERPHOUSE_API_KEY", "") # Keep this name
15
+ BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
16
+ BRAVE_IMAGE_ENDPOINT = "https://api.search.brave.com/res/v1/images/search"
17
+ BRAVE_VIDEO_ENDPOINT = "https://api.search.brave.com/res/v1/videos/search"
18
+ BRAVE_NEWS_ENDPOINT = "https://api.search.brave.com/res/v1/news/search"
19
+ IMAGE_API_URL = "http://211.233.58.201:7896"
20
+ MAX_TOKENS = 7999
21
+
22
+ # Search modes and style definitions (in English)
23
+ SEARCH_MODES = {
24
+ "comprehensive": "Comprehensive answer with multiple sources",
25
+ "academic": "Academic and research-focused results",
26
+ "news": "Latest news and current events",
27
+ "technical": "Technical and specialized information",
28
+ "educational": "Educational and learning resources"
29
+ }
30
+
31
+ RESPONSE_STYLES = {
32
+ "professional": "Professional and formal tone",
33
+ "casual": "Friendly and conversational tone",
34
+ "simple": "Simple and easy to understand",
35
+ "detailed": "Detailed and thorough explanations"
36
+ }
37
+
38
+ # Example search queries
39
+ EXAMPLE_QUERIES = {
40
+ "example1": "What are the latest developments in quantum computing?",
41
+ "example2": "How does climate change affect biodiversity in tropical rainforests?",
42
+ "example3": "What are the economic implications of artificial intelligence in the job market?"
43
+ }
44
+
45
+ # ──────────────────────────────── Logging ────────────────────────────────
46
+ logging.basicConfig(level=logging.INFO,
47
+ format="%(asctime)s - %(levelname)s - %(message)s")
48
+
49
+ # ──────────────────────────────── OpenAI Client ──────────────────────────
50
+
51
+ # OpenAI ν΄λΌμ΄μ–ΈνŠΈμ— νƒ€μž„μ•„μ›ƒκ³Ό μž¬μ‹œλ„ 둜직 μΆ”κ°€
52
+ @st.cache_resource
53
+ def get_openai_client():
54
+ """Create an OpenAI client with timeout and retry settings."""
55
+ if not OPENAI_API_KEY:
56
+ raise RuntimeError("⚠️ OPENAI_API_KEY ν™˜κ²½ λ³€μˆ˜κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
57
+ return OpenAI(
58
+ api_key=OPENAI_API_KEY,
59
+ timeout=60.0, # νƒ€μž„μ•„μ›ƒ 60초둜 μ„€μ •
60
+ max_retries=3 # μž¬μ‹œλ„ 횟수 3회둜 μ„€μ •
61
+ )
62
+
63
+ # ──────────────────────────────── System Prompt ─────────────────────────
64
+ def get_system_prompt(mode="comprehensive", style="professional", include_search_results=True, include_uploaded_files=False) -> str:
65
+ """
66
+ Generate a system prompt for the perplexity-like interface based on:
67
+ - The selected search mode and style
68
+ - Guidelines for using web search results and uploaded files
69
+ """
70
+
71
+ # Base prompt for comprehensive mode
72
+ comprehensive_prompt = """
73
+ You are an advanced AI assistant that provides comprehensive answers with multiple sources, similar to Perplexity.
74
+
75
+ Your task is to:
76
+ 1. Thoroughly analyze the user's query
77
+ 2. Provide a clear, well-structured answer integrating information from multiple sources
78
+ 3. Include relevant images, videos, and links in your response
79
+ 4. Format your answer with proper headings, bullet points, and sections
80
+ 5. Cite sources inline and provide a references section at the end
81
+
82
+ Important guidelines:
83
+ - Organize information logically with clear section headings
84
+ - Use bullet points and numbered lists for clarity
85
+ - Include specific, factual information whenever possible
86
+ - Provide balanced perspectives on controversial topics
87
+ - Display relevant statistics, data, or quotes when appropriate
88
+ - Format your response using markdown for readability
89
+ """
90
+
91
+ # Alternative modes
92
+ mode_prompts = {
93
+ "academic": """
94
+ Your focus is on providing academic and research-focused responses:
95
+ - Prioritize peer-reviewed research and academic sources
96
+ - Include citations in a formal academic format
97
+ - Discuss methodologies and research limitations where relevant
98
+ - Present different scholarly perspectives on the topic
99
+ - Use precise, technical language appropriate for an academic audience
100
+ """,
101
+ "news": """
102
+ Your focus is on providing the latest news and current events:
103
+ - Prioritize recent news articles and current information
104
+ - Include publication dates for all news sources
105
+ - Present multiple perspectives from different news outlets
106
+ - Distinguish between facts and opinions/editorial content
107
+ - Update information with the most recent developments
108
+ """,
109
+ "technical": """
110
+ Your focus is on providing technical and specialized information:
111
+ - Use precise technical terminology appropriate to the field
112
+ - Include code snippets, formulas, or technical diagrams where relevant
113
+ - Break down complex concepts into step-by-step explanations
114
+ - Reference technical documentation, standards, and best practices
115
+ - Consider different technical approaches or methodologies
116
+ """,
117
+ "educational": """
118
+ Your focus is on providing educational and learning resources:
119
+ - Structure information in a learning-friendly progression
120
+ - Include examples, analogies, and visual explanations
121
+ - Highlight key concepts and definitions
122
+ - Suggest further learning resources at different difficulty levels
123
+ - Present information that's accessible to learners at various levels
124
+ """
125
+ }
126
+
127
+ # Response styles
128
+ style_guides = {
129
+ "professional": "Use a professional, authoritative voice. Clearly explain technical terms and present data systematically.",
130
+ "casual": "Use a relaxed, conversational style with a friendly tone. Include relatable examples and occasionally use informal expressions.",
131
+ "simple": "Use straightforward language and avoid jargon. Keep sentences and paragraphs short. Explain concepts as if to someone with no background in the subject.",
132
+ "detailed": "Provide thorough explanations with comprehensive background information. Explore nuances and edge cases. Present multiple perspectives and detailed analysis."
133
+ }
134
+
135
+ # Guidelines for using search results
136
+ search_guide = """
137
+ Guidelines for Using Search Results:
138
+ - Include source links directly in your response using markdown: [Source Name](URL)
139
+ - For each major claim or piece of information, indicate its source
140
+ - If sources conflict, explain the different perspectives and their reliability
141
+ - Include 3-5 relevant images by writing: ![Image description](image_url)
142
+ - Include 1-2 relevant video links when appropriate by writing: [Video: Title](video_url)
143
+ - Format search information into a cohesive, well-structured response
144
+ - Include a "References" section at the end listing all major sources with links
145
+ """
146
+
147
+ # Guidelines for using uploaded files
148
+ upload_guide = """
149
+ Guidelines for Using Uploaded Files:
150
+ - Treat the uploaded files as primary sources for your response
151
+ - Extract and highlight key information from files that directly addresses the query
152
+ - Quote relevant passages and cite the specific file
153
+ - For numerical data in CSV files, consider creating summary statements
154
+ - For PDF content, reference specific sections or pages
155
+ - Integrate file information seamlessly with web search results
156
+ - When information conflicts, prioritize file content over general web results
157
+ """
158
+
159
+ # Choose base prompt based on mode
160
+ if mode == "comprehensive":
161
+ final_prompt = comprehensive_prompt
162
+ else:
163
+ final_prompt = comprehensive_prompt + "\n" + mode_prompts.get(mode, "")
164
+
165
+ # Add style guide
166
+ if style in style_guides:
167
+ final_prompt += f"\n\nTone and Style: {style_guides[style]}"
168
+
169
+ # Add search results guidance
170
+ if include_search_results:
171
+ final_prompt += f"\n\n{search_guide}"
172
+
173
+ # Add uploaded files guidance
174
+ if include_uploaded_files:
175
+ final_prompt += f"\n\n{upload_guide}"
176
+
177
+ # Additional formatting instructions
178
+ final_prompt += """
179
+ \n\nAdditional Formatting Requirements:
180
+ - Use markdown headings (## and ###) to organize your response
181
+ - Use bold text (**text**) for emphasis on important points
182
+ - Include a "Related Questions" section at the end with 3-5 follow-up questions
183
+ - Format your response with proper spacing and paragraph breaks
184
+ - Make all links clickable by using proper markdown format: [text](url)
185
+ """
186
+
187
+ return final_prompt
188
+
189
+ # ──────────────────────────────── Brave Search API ────────────────────────
190
+ @st.cache_data(ttl=3600)
191
+ def brave_search(query: str, count: int = 20):
192
+ """
193
+ Call the Brave Web Search API β†’ list[dict]
194
+ Returns fields: index, title, link, snippet, displayed_link
195
+ """
196
+ if not BRAVE_KEY:
197
+ raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
198
+
199
+ headers = {
200
+ "Accept": "application/json",
201
+ "Accept-Encoding": "gzip",
202
+ "X-Subscription-Token": BRAVE_KEY
203
+ }
204
+ params = {"q": query, "count": str(count)}
205
+
206
+ for attempt in range(3):
207
+ try:
208
+ r = requests.get(BRAVE_ENDPOINT, headers=headers, params=params, timeout=15)
209
+ r.raise_for_status()
210
+ data = r.json()
211
+
212
+ logging.info(f"Brave search result data structure: {list(data.keys())}")
213
+
214
+ raw = data.get("web", {}).get("results") or data.get("results", [])
215
+ if not raw:
216
+ logging.warning(f"No Brave search results found. Response: {data}")
217
+ raise ValueError("No search results found.")
218
+
219
+ arts = []
220
+ for i, res in enumerate(raw[:count], 1):
221
+ url = res.get("url", res.get("link", ""))
222
+ host = re.sub(r"https?://(www\.)?", "", url).split("/")[0]
223
+ arts.append({
224
+ "index": i,
225
+ "title": res.get("title", "No title"),
226
+ "link": url,
227
+ "snippet": res.get("description", res.get("text", "No snippet")),
228
+ "displayed_link": host
229
+ })
230
+
231
+ logging.info(f"Brave search success: {len(arts)} results")
232
+ return arts
233
+
234
+ except Exception as e:
235
+ logging.error(f"Brave search failure (attempt {attempt+1}/3): {e}")
236
+ if attempt < 2:
237
+ time.sleep(2)
238
+
239
+ return []
240
+
241
+ @st.cache_data(ttl=3600)
242
+ def brave_image_search(query: str, count: int = 10):
243
+ """
244
+ Call the Brave Image Search API β†’ list[dict]
245
+ Returns fields: index, title, image_url, source_url
246
+ """
247
+ if not BRAVE_KEY:
248
+ raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
249
+
250
+ headers = {
251
+ "Accept": "application/json",
252
+ "Accept-Encoding": "gzip",
253
+ "X-Subscription-Token": BRAVE_KEY
254
+ }
255
+ params = {
256
+ "q": query,
257
+ "count": str(count),
258
+ "search_lang": "en",
259
+ "country": "us",
260
+ "spellcheck": "1"
261
+ }
262
+
263
+ for attempt in range(3):
264
+ try:
265
+ r = requests.get(BRAVE_IMAGE_ENDPOINT, headers=headers, params=params, timeout=15)
266
+ r.raise_for_status()
267
+ data = r.json()
268
+
269
+ results = []
270
+ for i, img in enumerate(data.get("results", [])[:count], 1):
271
+ results.append({
272
+ "index": i,
273
+ "title": img.get("title", "Image"),
274
+ "image_url": img.get("image", {}).get("url", ""),
275
+ "source_url": img.get("source", ""),
276
+ "width": img.get("image", {}).get("width", 0),
277
+ "height": img.get("image", {}).get("height", 0)
278
+ })
279
+
280
+ logging.info(f"Brave image search success: {len(results)} results")
281
+ return results
282
+
283
+ except Exception as e:
284
+ logging.error(f"Brave image search failure (attempt {attempt+1}/3): {e}")
285
+ if attempt < 2:
286
+ time.sleep(2)
287
+
288
+ return []
289
+
290
+ @st.cache_data(ttl=3600)
291
+ def brave_video_search(query: str, count: int = 5):
292
+ """
293
+ Call the Brave Video Search API β†’ list[dict]
294
+ Returns fields: index, title, video_url, thumbnail_url, source
295
+ """
296
+ if not BRAVE_KEY:
297
+ raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
298
+
299
+ headers = {
300
+ "Accept": "application/json",
301
+ "Accept-Encoding": "gzip",
302
+ "X-Subscription-Token": BRAVE_KEY
303
+ }
304
+ params = {
305
+ "q": query,
306
+ "count": str(count)
307
+ }
308
+
309
+ for attempt in range(3):
310
+ try:
311
+ r = requests.get(BRAVE_VIDEO_ENDPOINT, headers=headers, params=params, timeout=15)
312
+ r.raise_for_status()
313
+ data = r.json()
314
+
315
+ results = []
316
+ for i, vid in enumerate(data.get("results", [])[:count], 1):
317
+ results.append({
318
+ "index": i,
319
+ "title": vid.get("title", "Video"),
320
+ "video_url": vid.get("url", ""),
321
+ "thumbnail_url": vid.get("thumbnail", {}).get("src", ""),
322
+ "source": vid.get("provider", {}).get("name", "Unknown source")
323
+ })
324
+
325
+ logging.info(f"Brave video search success: {len(results)} results")
326
+ return results
327
+
328
+ except Exception as e:
329
+ logging.error(f"Brave video search failure (attempt {attempt+1}/3): {e}")
330
+ if attempt < 2:
331
+ time.sleep(2)
332
+
333
+ return []
334
+
335
+ @st.cache_data(ttl=3600)
336
+ def brave_news_search(query: str, count: int = 5):
337
+ """
338
+ Call the Brave News Search API β†’ list[dict]
339
+ Returns fields: index, title, url, description, source, date
340
+ """
341
+ if not BRAVE_KEY:
342
+ raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
343
+
344
+ headers = {
345
+ "Accept": "application/json",
346
+ "Accept-Encoding": "gzip",
347
+ "X-Subscription-Token": BRAVE_KEY
348
+ }
349
+ params = {
350
+ "q": query,
351
+ "count": str(count)
352
+ }
353
+
354
+ for attempt in range(3):
355
+ try:
356
+ r = requests.get(BRAVE_NEWS_ENDPOINT, headers=headers, params=params, timeout=15)
357
+ r.raise_for_status()
358
+ data = r.json()
359
+
360
+ results = []
361
+ for i, news in enumerate(data.get("results", [])[:count], 1):
362
+ results.append({
363
+ "index": i,
364
+ "title": news.get("title", "News article"),
365
+ "url": news.get("url", ""),
366
+ "description": news.get("description", ""),
367
+ "source": news.get("source", "Unknown source"),
368
+ "date": news.get("age", "Unknown date")
369
+ })
370
+
371
+ logging.info(f"Brave news search success: {len(results)} results")
372
+ return results
373
+
374
+ except Exception as e:
375
+ logging.error(f"Brave news search failure (attempt {attempt+1}/3): {e}")
376
+ if attempt < 2:
377
+ time.sleep(2)
378
+
379
+ return []
380
+
381
+ def mock_results(query: str) -> str:
382
+ """Fallback search results if API fails"""
383
+ ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
384
+ return (f"# Fallback Search Content (Generated: {ts})\n\n"
385
+ f"The search API request failed. Please generate a response based on any pre-existing knowledge about '{query}'.\n\n"
386
+ f"You may consider the following points:\n\n"
387
+ f"- Basic concepts and importance of {query}\n"
388
+ f"- Commonly known related statistics or trends\n"
389
+ f"- Typical expert opinions on this subject\n"
390
+ f"- Questions that readers might have\n\n"
391
+ f"Note: This is fallback guidance, not real-time data.\n\n")
392
+
393
+ def do_web_search(query: str) -> str:
394
+ """Perform web search and format the results."""
395
+ try:
396
+ # Web search
397
+ arts = brave_search(query, 20)
398
+ if not arts:
399
+ logging.warning("No search results, using fallback content")
400
+ return mock_results(query)
401
+
402
+ # Image search
403
+ images = brave_image_search(query, 5)
404
+
405
+ # Video search
406
+ videos = brave_video_search(query, 2)
407
+
408
+ # News search
409
+ news = brave_news_search(query, 3)
410
+
411
+ # Format all results
412
+ result = "# Web Search Results\nUse these results to provide a comprehensive answer with multiple sources. Include relevant images, videos, and links.\n\n"
413
+
414
+ # Add web results
415
+ result += "## Web Results\n\n"
416
+ for a in arts[:10]: # Limit to top 10 results
417
+ result += f"### Result {a['index']}: {a['title']}\n\n{a['snippet']}\n\n"
418
+ result += f"**Source**: [{a['displayed_link']}]({a['link']})\n\n---\n"
419
+
420
+ # Add image results if available
421
+ if images:
422
+ result += "## Image Results\n\n"
423
+ for img in images:
424
+ if img.get('image_url'):
425
+ result += f"![{img['title']}]({img['image_url']})\n\n"
426
+ result += f"**Source**: [{img.get('source_url', 'Image source')}]({img.get('source_url', '#')})\n\n"
427
+
428
+ # Add video results if available
429
+ if videos:
430
+ result += "## Video Results\n\n"
431
+ for vid in videos:
432
+ result += f"### {vid['title']}\n\n"
433
+ if vid.get('thumbnail_url'):
434
+ result += f"![Thumbnail]({vid['thumbnail_url']})\n\n"
435
+ result += f"**Watch**: [{vid['source']}]({vid['video_url']})\n\n"
436
+
437
+ # Add news results if available
438
+ if news:
439
+ result += "## News Results\n\n"
440
+ for n in news:
441
+ result += f"### {n['title']}\n\n{n['description']}\n\n"
442
+ result += f"**Source**: [{n['source']}]({n['url']}) - {n['date']}\n\n---\n"
443
+
444
+ return result
445
+
446
+ except Exception as e:
447
+ logging.error(f"Web search process failed: {str(e)}")
448
+ return mock_results(query)
449
+
450
+ # ──────────────────────────────── File Upload Handling ─────────────────────
451
+ def process_text_file(file):
452
+ """Handle text file"""
453
+ try:
454
+ content = file.read()
455
+ file.seek(0)
456
+
457
+ text = content.decode('utf-8', errors='ignore')
458
+ if len(text) > 10000:
459
+ text = text[:9700] + "...(truncated)..."
460
+
461
+ result = f"## Text File: {file.name}\n\n"
462
+ result += text
463
+ return result
464
+ except Exception as e:
465
+ logging.error(f"Error processing text file: {str(e)}")
466
+ return f"Error processing text file: {str(e)}"
467
+
468
+ def process_csv_file(file):
469
+ """Handle CSV file"""
470
+ try:
471
+ content = file.read()
472
+ file.seek(0)
473
+
474
+ df = pd.read_csv(io.BytesIO(content))
475
+ result = f"## CSV File: {file.name}\n\n"
476
+ result += f"- Rows: {len(df)}\n"
477
+ result += f"- Columns: {len(df.columns)}\n"
478
+ result += f"- Column Names: {', '.join(df.columns.tolist())}\n\n"
479
+
480
+ result += "### Data Preview\n\n"
481
+ preview_df = df.head(10)
482
+ try:
483
+ markdown_table = preview_df.to_markdown(index=False)
484
+ if markdown_table:
485
+ result += markdown_table + "\n\n"
486
+ else:
487
+ result += "Unable to display CSV data.\n\n"
488
+ except Exception as e:
489
+ logging.error(f"Markdown table conversion error: {e}")
490
+ result += "Displaying data as text:\n\n"
491
+ result += str(preview_df) + "\n\n"
492
+
493
+ num_cols = df.select_dtypes(include=['number']).columns
494
+ if len(num_cols) > 0:
495
+ result += "### Basic Statistical Information\n\n"
496
+ try:
497
+ stats_df = df[num_cols].describe().round(2)
498
+ stats_markdown = stats_df.to_markdown()
499
+ if stats_markdown:
500
+ result += stats_markdown + "\n\n"
501
+ else:
502
+ result += "Unable to display statistical information.\n\n"
503
+ except Exception as e:
504
+ logging.error(f"Statistical info conversion error: {e}")
505
+ result += "Unable to generate statistical information.\n\n"
506
+
507
+ return result
508
+ except Exception as e:
509
+ logging.error(f"CSV file processing error: {str(e)}")
510
+ return f"Error processing CSV file: {str(e)}"
511
+
512
+ def process_pdf_file(file):
513
+ """Handle PDF file"""
514
+ try:
515
+ # Read file in bytes
516
+ file_bytes = file.read()
517
+ file.seek(0)
518
+
519
+ # Use PyPDF2
520
+ pdf_file = io.BytesIO(file_bytes)
521
+ reader = PyPDF2.PdfReader(pdf_file, strict=False)
522
+
523
+ # Basic info
524
+ result = f"## PDF File: {file.name}\n\n"
525
+ result += f"- Total pages: {len(reader.pages)}\n\n"
526
+
527
+ # Extract text by page (limit to first 5 pages)
528
+ max_pages = min(5, len(reader.pages))
529
+ all_text = ""
530
+
531
+ for i in range(max_pages):
532
+ try:
533
+ page = reader.pages[i]
534
+ page_text = page.extract_text()
535
+
536
+ current_page_text = f"### Page {i+1}\n\n"
537
+ if page_text and len(page_text.strip()) > 0:
538
+ # Limit to 1500 characters per page
539
+ if len(page_text) > 1500:
540
+ current_page_text += page_text[:1500] + "...(truncated)...\n\n"
541
+ else:
542
+ current_page_text += page_text + "\n\n"
543
+ else:
544
+ current_page_text += "(No text could be extracted from this page)\n\n"
545
+
546
+ all_text += current_page_text
547
+
548
+ # If total text is too long, break
549
+ if len(all_text) > 8000:
550
+ all_text += "...(truncating remaining pages; PDF is too large)...\n\n"
551
+ break
552
+
553
+ except Exception as page_err:
554
+ logging.error(f"Error processing PDF page {i+1}: {str(page_err)}")
555
+ all_text += f"### Page {i+1}\n\n(Error extracting content: {str(page_err)})\n\n"
556
+
557
+ if len(reader.pages) > max_pages:
558
+ all_text += f"\nNote: Only the first {max_pages} pages are shown out of {len(reader.pages)} total.\n\n"
559
+
560
+ result += "### PDF Content\n\n" + all_text
561
+ return result
562
+
563
+ except Exception as e:
564
+ logging.error(f"PDF file processing error: {str(e)}")
565
+ return f"## PDF File: {file.name}\n\nError occurred: {str(e)}\n\nThis PDF file cannot be processed."
566
+
567
+ def process_uploaded_files(files):
568
+ """Combine the contents of all uploaded files into one string."""
569
+ if not files:
570
+ return None
571
+
572
+ result = "# Uploaded File Contents\n\n"
573
+ result += "Below is the content from the files provided by the user. Integrate this data as a main source of information for your response.\n\n"
574
+
575
+ for file in files:
576
+ try:
577
+ ext = file.name.split('.')[-1].lower()
578
+ if ext == 'txt':
579
+ result += process_text_file(file) + "\n\n---\n\n"
580
+ elif ext == 'csv':
581
+ result += process_csv_file(file) + "\n\n---\n\n"
582
+ elif ext == 'pdf':
583
+ result += process_pdf_file(file) + "\n\n---\n\n"
584
+ else:
585
+ result += f"### Unsupported File: {file.name}\n\n---\n\n"
586
+ except Exception as e:
587
+ logging.error(f"File processing error {file.name}: {e}")
588
+ result += f"### File processing error: {file.name}\n\nError: {e}\n\n---\n\n"
589
+
590
+ return result
591
+
592
+ # ──────────────────────────────── Image & Utility ─────────────────────────
593
+ def extract_image_urls_from_search(image_results):
594
+ """Extract valid image URLs from Brave image search results."""
595
+ if not image_results:
596
+ return []
597
+
598
+ valid_urls = []
599
+ for img in image_results:
600
+ url = img.get('image_url')
601
+ if url and url.startswith('http'):
602
+ valid_urls.append({
603
+ 'url': url,
604
+ 'title': img.get('title', 'Image'),
605
+ 'source': img.get('source_url', '')
606
+ })
607
+
608
+ return valid_urls
609
+
610
+ def extract_video_data_from_search(video_results):
611
+ """Extract valid video data from Brave video search results."""
612
+ if not video_results:
613
+ return []
614
+
615
+ valid_videos = []
616
+ for vid in video_results:
617
+ url = vid.get('video_url')
618
+ if url and url.startswith('http'):
619
+ valid_videos.append({
620
+ 'url': url,
621
+ 'title': vid.get('title', 'Video'),
622
+ 'thumbnail': vid.get('thumbnail_url', ''),
623
+ 'source': vid.get('source', 'Video source')
624
+ })
625
+
626
+ return valid_videos
627
+
628
+ def generate_image(prompt, w=768, h=768, g=3.5, steps=30, seed=3):
629
+ """Image generation function."""
630
+ if not prompt:
631
+ return None, "Insufficient prompt"
632
+ try:
633
+ res = Client(IMAGE_API_URL).predict(
634
+ prompt=prompt, width=w, height=h, guidance=g,
635
+ inference_steps=steps, seed=seed,
636
+ do_img2img=False, init_image=None,
637
+ image2image_strength=0.8, resize_img=True,
638
+ api_name="/generate_image"
639
+ )
640
+ return res[0], f"Seed: {res[1]}"
641
+ except Exception as e:
642
+ logging.error(e)
643
+ return None, str(e)
644
+
645
+ def extract_image_prompt(response_text: str, topic: str):
646
+ """
647
+ Generate a single-line English image prompt from the response content.
648
+ """
649
+ client = get_openai_client()
650
+
651
+ try:
652
+ response = client.chat.completions.create(
653
+ model="gpt-4.1-mini",
654
+ messages=[
655
+ {"role": "system", "content": "Generate a single-line English image prompt from the following text. Return only the prompt text, nothing else."},
656
+ {"role": "user", "content": f"Topic: {topic}\n\n---\n{response_text}\n\n---"}
657
+ ],
658
+ temperature=1,
659
+ max_tokens=80,
660
+ top_p=1
661
+ )
662
+
663
+ return response.choices[0].message.content.strip()
664
+ except Exception as e:
665
+ logging.error(f"OpenAI image prompt generation error: {e}")
666
+ return f"A professional photo related to {topic}, high quality"
667
+
668
+ def md_to_html(md: str, title="Perplexity-like Response"):
669
+ """Convert Markdown to HTML."""
670
+ return f"<!DOCTYPE html><html><head><title>{title}</title><meta charset='utf-8'></head><body>{markdown.markdown(md)}</body></html>"
671
+
672
+ def keywords(text: str, top=5):
673
+ """Simple keyword extraction."""
674
+ cleaned = re.sub(r"[^κ°€-힣a-zA-Z0-9\s]", "", text)
675
+ return " ".join(cleaned.split()[:top])
676
+
677
+ # ──────────────────────────────── Streamlit UI ────────────────────────────
678
+ def perplexity_app():
679
+ st.title("Perplexity-like AI Assistant")
680
+
681
+ # Set default session state
682
+ if "ai_model" not in st.session_state:
683
+ st.session_state.ai_model = "gpt-4.1-mini" # κ³ μ • λͺ¨λΈ μ„€μ •
684
+ if "messages" not in st.session_state:
685
+ st.session_state.messages = []
686
+ if "auto_save" not in st.session_state:
687
+ st.session_state.auto_save = True
688
+ if "generate_image" not in st.session_state:
689
+ st.session_state.generate_image = False
690
+ if "web_search_enabled" not in st.session_state:
691
+ st.session_state.web_search_enabled = True
692
+ if "search_mode" not in st.session_state:
693
+ st.session_state.search_mode = "comprehensive"
694
+ if "response_style" not in st.session_state:
695
+ st.session_state.response_style = "professional"
696
+
697
+ # Sidebar UI
698
+ sb = st.sidebar
699
+ sb.title("Search Settings")
700
+
701
+ sb.subheader("Response Configuration")
702
+ sb.selectbox(
703
+ "Search Mode",
704
+ options=list(SEARCH_MODES.keys()),
705
+ format_func=lambda x: SEARCH_MODES[x],
706
+ key="search_mode"
707
+ )
708
+
709
+ sb.selectbox(
710
+ "Response Style",
711
+ options=list(RESPONSE_STYLES.keys()),
712
+ format_func=lambda x: RESPONSE_STYLES[x],
713
+ key="response_style"
714
+ )
715
+
716
+ # Example queries
717
+ sb.subheader("Example Queries")
718
+ c1, c2, c3 = sb.columns(3)
719
+ if c1.button("Quantum Computing", key="ex1"):
720
+ process_example(EXAMPLE_QUERIES["example1"])
721
+ if c2.button("Climate Change", key="ex2"):
722
+ process_example(EXAMPLE_QUERIES["example2"])
723
+ if c3.button("AI Economics", key="ex3"):
724
+ process_example(EXAMPLE_QUERIES["example3"])
725
+
726
+ sb.subheader("Other Settings")
727
+ sb.toggle("Auto Save", key="auto_save")
728
+ sb.toggle("Auto Image Generation", key="generate_image")
729
+
730
+ web_search_enabled = sb.toggle("Use Web Search", value=st.session_state.web_search_enabled)
731
+ st.session_state.web_search_enabled = web_search_enabled
732
+
733
+ if web_search_enabled:
734
+ st.sidebar.info("βœ… Web search results will be integrated into the response.")
735
+
736
+ # Download the latest response
737
+ latest_response = next(
738
+ (m["content"] for m in reversed(st.session_state.messages)
739
+ if m["role"] == "assistant" and m["content"].strip()),
740
+ None
741
+ )
742
+ if latest_response:
743
+ # Extract a title from the response - first heading or first line
744
+ title_match = re.search(r"# (.*?)(\n|$)", latest_response)
745
+ if title_match:
746
+ title = title_match.group(1).strip()
747
+ else:
748
+ first_line = latest_response.split('\n', 1)[0].strip()
749
+ title = first_line[:40] + "..." if len(first_line) > 40 else first_line
750
+
751
+ sb.subheader("Download Latest Response")
752
+ d1, d2 = sb.columns(2)
753
+ d1.download_button("Download as Markdown", latest_response,
754
+ file_name=f"{title}.md", mime="text/markdown")
755
+ d2.download_button("Download as HTML", md_to_html(latest_response, title),
756
+ file_name=f"{title}.html", mime="text/html")
757
+
758
+ # JSON conversation record upload
759
+ up = sb.file_uploader("Load Conversation History (.json)", type=["json"], key="json_uploader")
760
+ if up:
761
+ try:
762
+ st.session_state.messages = json.load(up)
763
+ sb.success("Conversation history loaded successfully")
764
+ except Exception as e:
765
+ sb.error(f"Failed to load: {e}")
766
+
767
+ # JSON conversation record download
768
+ if sb.button("Download Conversation as JSON"):
769
+ sb.download_button(
770
+ "Save",
771
+ data=json.dumps(st.session_state.messages, ensure_ascii=False, indent=2),
772
+ file_name="conversation_history.json",
773
+ mime="application/json"
774
+ )
775
+
776
+ # File Upload
777
+ st.subheader("Upload Files")
778
+ uploaded_files = st.file_uploader(
779
+ "Upload files to be used as reference (txt, csv, pdf)",
780
+ type=["txt", "csv", "pdf"],
781
+ accept_multiple_files=True,
782
+ key="file_uploader"
783
+ )
784
+
785
+ if uploaded_files:
786
+ file_count = len(uploaded_files)
787
+ st.success(f"{file_count} files uploaded. They will be used as sources for your query.")
788
+
789
+ with st.expander("Preview Uploaded Files", expanded=False):
790
+ for idx, file in enumerate(uploaded_files):
791
+ st.write(f"**File Name:** {file.name}")
792
+ ext = file.name.split('.')[-1].lower()
793
+
794
+ if ext == 'txt':
795
+ preview = file.read(1000).decode('utf-8', errors='ignore')
796
+ file.seek(0)
797
+ st.text_area(
798
+ f"Preview of {file.name}",
799
+ preview + ("..." if len(preview) >= 1000 else ""),
800
+ height=150
801
+ )
802
+ elif ext == 'csv':
803
+ try:
804
+ df = pd.read_csv(file)
805
+ file.seek(0)
806
+ st.write("CSV Preview (up to 5 rows)")
807
+ st.dataframe(df.head(5))
808
+ except Exception as e:
809
+ st.error(f"CSV preview failed: {e}")
810
+ elif ext == 'pdf':
811
+ try:
812
+ file_bytes = file.read()
813
+ file.seek(0)
814
+
815
+ pdf_file = io.BytesIO(file_bytes)
816
+ reader = PyPDF2.PdfReader(pdf_file, strict=False)
817
+
818
+ pc = len(reader.pages)
819
+ st.write(f"PDF File: {pc} pages")
820
+
821
+ if pc > 0:
822
+ try:
823
+ page_text = reader.pages[0].extract_text()
824
+ preview = page_text[:500] if page_text else "(No text extracted)"
825
+ st.text_area("Preview of the first page", preview + "...", height=150)
826
+ except:
827
+ st.warning("Failed to extract text from the first page")
828
+ except Exception as e:
829
+ st.error(f"PDF preview failed: {e}")
830
+
831
+ if idx < file_count - 1:
832
+ st.divider()
833
+
834
+ # Display existing messages
835
+ for m in st.session_state.messages:
836
+ with st.chat_message(m["role"]):
837
+ # Process markdown to allow clickable links and properly rendered content
838
+ st.markdown(m["content"], unsafe_allow_html=True)
839
+
840
+ # Display images if present
841
+ if "images" in m and m["images"]:
842
+ st.subheader("Related Images")
843
+ cols = st.columns(min(3, len(m["images"])))
844
+ for i, img_data in enumerate(m["images"]):
845
+ col_idx = i % len(cols)
846
+ with cols[col_idx]:
847
+ try:
848
+ img_url = img_data.get('url', '')
849
+ caption = img_data.get('title', 'Related image')
850
+ if img_url:
851
+ st.image(img_url, caption=caption, use_column_width=True)
852
+ if img_data.get('source'):
853
+ st.markdown(f"[Source]({img_data['source']})")
854
+ except Exception as img_err:
855
+ st.warning(f"Could not display image: {img_err}")
856
+
857
+ # Display videos if present
858
+ if "videos" in m and m["videos"]:
859
+ st.subheader("Related Videos")
860
+ for video in m["videos"]:
861
+ video_title = video.get('title', 'Related video')
862
+ video_url = video.get('url', '')
863
+ thumbnail = video.get('thumbnail', '')
864
+
865
+ # Display video information with thumbnail if available
866
+ if thumbnail:
867
+ col1, col2 = st.columns([1, 3])
868
+ with col1:
869
+ try:
870
+ st.image(thumbnail, width=120)
871
+ except:
872
+ st.write("🎬")
873
+ with col2:
874
+ st.markdown(f"**[{video_title}]({video_url})**")
875
+ st.write(f"Source: {video.get('source', 'Unknown')}")
876
+ else:
877
+ st.markdown(f"🎬 **[{video_title}]({video_url})**")
878
+ st.write(f"Source: {video.get('source', 'Unknown')}")
879
+
880
+ # User input
881
+ query = st.chat_input("Enter your query or question here.")
882
+ if query:
883
+ process_input(query, uploaded_files)
884
+
885
+ # μ‚¬μ΄λ“œλ°” ν•˜λ‹¨ λ°°μ§€(링크) μΆ”κ°€
886
+ sb.markdown("---")
887
+ sb.markdown("Created by [https://ginigen.com](https://ginigen.com) | [YouTube Channel](https://www.youtube.com/@ginipickaistudio)")
888
+
889
+ def process_example(topic):
890
+ """Process the selected example query."""
891
+ process_input(topic, [])
892
+
893
+ def process_input(query: str, uploaded_files):
894
+ # Add user's message
895
+ if not any(m["role"] == "user" and m["content"] == query for m in st.session_state.messages):
896
+ st.session_state.messages.append({"role": "user", "content": query})
897
+
898
+ with st.chat_message("user"):
899
+ st.markdown(query)
900
+
901
+ with st.chat_message("assistant"):
902
+ placeholder = st.empty()
903
+ message_placeholder = st.empty()
904
+ full_response = ""
905
+
906
+ use_web_search = st.session_state.web_search_enabled
907
+ has_uploaded_files = bool(uploaded_files) and len(uploaded_files) > 0
908
+
909
+ try:
910
+ # μƒνƒœ ν‘œμ‹œλ₯Ό μœ„ν•œ μƒνƒœ μ»΄ν¬λ„ŒνŠΈ
911
+ status = st.status("Preparing to answer your query...")
912
+ status.update(label="Initializing client...")
913
+
914
+ client = get_openai_client()
915
+
916
+ # Web search
917
+ search_content = None
918
+ image_results = []
919
+ video_results = []
920
+ news_results = []
921
+
922
+ if use_web_search:
923
+ status.update(label="Performing web search...")
924
+ with st.spinner("Searching the web..."):
925
+ search_content = do_web_search(keywords(query, top=5))
926
+
927
+ # Perform specific searches for media
928
+ try:
929
+ status.update(label="Finding images and videos...")
930
+ image_results = brave_image_search(query, 5)
931
+ video_results = brave_video_search(query, 2)
932
+ news_results = brave_news_search(query, 3)
933
+ except Exception as search_err:
934
+ logging.error(f"Media search error: {search_err}")
935
+
936
+ # Process uploaded files β†’ content
937
+ file_content = None
938
+ if has_uploaded_files:
939
+ status.update(label="Processing uploaded files...")
940
+ with st.spinner("Analyzing files..."):
941
+ file_content = process_uploaded_files(uploaded_files)
942
+
943
+ # Extract usable image and video data
944
+ valid_images = extract_image_urls_from_search(image_results)
945
+ valid_videos = extract_video_data_from_search(video_results)
946
+
947
+ # Build system prompt
948
+ status.update(label="Preparing comprehensive answer...")
949
+ sys_prompt = get_system_prompt(
950
+ mode=st.session_state.search_mode,
951
+ style=st.session_state.response_style,
952
+ include_search_results=use_web_search,
953
+ include_uploaded_files=has_uploaded_files
954
+ )
955
+
956
+ # OpenAI API 호좜 μ€€λΉ„
957
+ status.update(label="Generating response...")
958
+
959
+ # λ©”μ‹œμ§€ ꡬ성
960
+ api_messages = [
961
+ {"role": "system", "content": sys_prompt}
962
+ ]
963
+
964
+ user_content = query
965
+
966
+ # 검색 κ²°κ³Όκ°€ 있으면 μ‚¬μš©μž ν”„λ‘¬ν”„νŠΈμ— μΆ”κ°€
967
+ if search_content:
968
+ user_content += "\n\n" + search_content
969
+
970
+ # 파일 λ‚΄μš©μ΄ 있으면 μ‚¬μš©μž ν”„λ‘¬ν”„νŠΈμ— μΆ”κ°€
971
+ if file_content:
972
+ user_content += "\n\n" + file_content
973
+
974
+ # Include specific image information
975
+ if valid_images:
976
+ user_content += "\n\n# Available Images\n"
977
+ for i, img in enumerate(valid_images[:5]):
978
+ user_content += f"\n{i+1}. ![{img['title']}]({img['url']})\n"
979
+ if img['source']:
980
+ user_content += f" Source: {img['source']}\n"
981
+
982
+ # Include specific video information
983
+ if valid_videos:
984
+ user_content += "\n\n# Available Videos\n"
985
+ for i, vid in enumerate(valid_videos[:2]):
986
+ user_content += f"\n{i+1}. **{vid['title']}** - [{vid['source']}]({vid['url']})\n"
987
+
988
+ # μ‚¬μš©μž λ©”μ‹œμ§€ μΆ”κ°€
989
+ api_messages.append({"role": "user", "content": user_content})
990
+
991
+ # OpenAI API 슀트리밍 호좜 - κ³ μ • λͺ¨λΈ "gpt-4.1-mini" μ‚¬μš©
992
+ try:
993
+ # 슀트리밍 λ°©μ‹μœΌλ‘œ API 호좜
994
+ stream = client.chat.completions.create(
995
+ model="gpt-4.1-mini", # κ³ μ • λͺ¨λΈ μ‚¬μš©
996
+ messages=api_messages,
997
+ temperature=1,
998
+ max_tokens=MAX_TOKENS,
999
+ top_p=1,
1000
+ stream=True # 슀트리밍 ν™œμ„±ν™”
1001
+ )
1002
+
1003
+ # 슀트리밍 응닡 처리
1004
+ for chunk in stream:
1005
+ if chunk.choices and len(chunk.choices) > 0 and chunk.choices[0].delta.content is not None:
1006
+ content_delta = chunk.choices[0].delta.content
1007
+ full_response += content_delta
1008
+ message_placeholder.markdown(full_response + "β–Œ", unsafe_allow_html=True)
1009
+
1010
+ # μ΅œμ’… 응닡 ν‘œμ‹œ (μ»€μ„œ 제거)
1011
+ message_placeholder.markdown(full_response, unsafe_allow_html=True)
1012
+
1013
+ # Display related images if available
1014
+ if valid_images:
1015
+ st.subheader("Related Images")
1016
+ image_cols = st.columns(min(3, len(valid_images)))
1017
+
1018
+ for i, img_data in enumerate(valid_images):
1019
+ col_idx = i % len(image_cols)
1020
+ with image_cols[col_idx]:
1021
+ try:
1022
+ st.image(img_data['url'], caption=img_data['title'], use_column_width=True)
1023
+ if img_data['source']:
1024
+ st.markdown(f"[Source]({img_data['source']})")
1025
+ except Exception as img_err:
1026
+ st.warning(f"Could not load image: {str(img_err)}")
1027
+
1028
+ # Display related videos if available
1029
+ if valid_videos:
1030
+ st.subheader("Related Videos")
1031
+ for video in valid_videos:
1032
+ video_title = video.get('title', 'Related video')
1033
+ video_url = video.get('url', '')
1034
+ thumbnail = video.get('thumbnail', '')
1035
+
1036
+ # Display video information with thumbnail if available
1037
+ if thumbnail:
1038
+ col1, col2 = st.columns([1, 3])
1039
+ with col1:
1040
+ try:
1041
+ st.image(thumbnail, width=120)
1042
+ except:
1043
+ st.write("🎬")
1044
+ with col2:
1045
+ st.markdown(f"**[{video_title}]({video_url})**")
1046
+ st.write(f"Source: {video.get('source', 'Unknown')}")
1047
+ else:
1048
+ st.markdown(f"🎬 **[{video_title}]({video_url})**")
1049
+ st.write(f"Source: {video.get('source', 'Unknown')}")
1050
+
1051
+ status.update(label="Response completed!", state="complete")
1052
+
1053
+ # Save the response with images and videos in the session state
1054
+ st.session_state.messages.append({
1055
+ "role": "assistant",
1056
+ "content": full_response,
1057
+ "images": valid_images,
1058
+ "videos": valid_videos
1059
+ })
1060
+
1061
+ except Exception as api_error:
1062
+ error_message = str(api_error)
1063
+ logging.error(f"API error: {error_message}")
1064
+ status.update(label=f"Error: {error_message}", state="error")
1065
+ raise Exception(f"Response generation error: {error_message}")
1066
+
1067
+ # Additional image generation if enabled
1068
+ if st.session_state.generate_image and full_response:
1069
+ with st.spinner("Generating custom image..."):
1070
+ try:
1071
+ ip = extract_image_prompt(full_response, query)
1072
+ img, cap = generate_image(ip)
1073
+ if img:
1074
+ st.subheader("AI-Generated Image")
1075
+ st.image(img, caption=cap)
1076
+ except Exception as img_error:
1077
+ logging.error(f"Image generation error: {str(img_error)}")
1078
+ st.warning("Custom image generation failed. Using web images only.")
1079
+
1080
+ # Download buttons
1081
+ if full_response:
1082
+ st.subheader("Download This Response")
1083
+ c1, c2 = st.columns(2)
1084
+ c1.download_button(
1085
+ "Markdown",
1086
+ data=full_response,
1087
+ file_name=f"{query[:30]}.md",
1088
+ mime="text/markdown"
1089
+ )
1090
+ c2.download_button(
1091
+ "HTML",
1092
+ data=md_to_html(full_response, query[:30]),
1093
+ file_name=f"{query[:30]}.html",
1094
+ mime="text/html"
1095
+ )
1096
+
1097
+ # Auto save
1098
+ if st.session_state.auto_save and st.session_state.messages:
1099
+ try:
1100
+ fn = f"conversation_history_auto_{datetime.now():%Y%m%d_%H%M%S}.json"
1101
+ with open(fn, "w", encoding="utf-8") as fp:
1102
+ json.dump(st.session_state.messages, fp, ensure_ascii=False, indent=2)
1103
+ except Exception as e:
1104
+ logging.error(f"Auto-save failed: {e}")
1105
+
1106
+ except Exception as e:
1107
+ error_message = str(e)
1108
+ placeholder.error(f"An error occurred: {error_message}")
1109
+ logging.error(f"Process input error: {error_message}")
1110
+ ans = f"An error occurred while processing your request: {error_message}"
1111
+ st.session_state.messages.append({"role": "assistant", "content": ans})
1112
+
1113
+
1114
+ # ──────────────────────────────── main ────────────────────────────────────
1115
+ def main():
1116
+ perplexity_app()
1117
+
1118
+ if __name__ == "__main__":
1119
+ main()