openfree commited on
Commit
6ed5f50
ยท
verified ยท
1 Parent(s): 225994f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +257 -272
app.py CHANGED
@@ -2,37 +2,25 @@ import gradio as gr
2
  import os
3
  from typing import List, Dict, Any, Optional
4
  import hashlib
5
- import json
6
  from datetime import datetime
7
- import tempfile
8
 
9
- # PDF ์ฒ˜๋ฆฌ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ (์„ค์น˜ ํ•„์š”ํ•œ ๊ฒฝ์šฐ๋ฅผ ์œ„ํ•œ ๋Œ€์ฒด ๊ตฌํ˜„ ํฌํ•จ)
10
  try:
11
  import fitz # PyMuPDF
12
  PDF_AVAILABLE = True
13
  except ImportError:
14
  PDF_AVAILABLE = False
15
- print("PyMuPDF not installed. Install with: pip install pymupdf")
16
-
17
- try:
18
- import chromadb
19
- from chromadb.utils import embedding_functions
20
- CHROMA_AVAILABLE = True
21
- except ImportError:
22
- CHROMA_AVAILABLE = False
23
- print("ChromaDB not installed. Install with: pip install chromadb")
24
 
25
  try:
26
  from sentence_transformers import SentenceTransformer
27
  ST_AVAILABLE = True
28
  except ImportError:
29
  ST_AVAILABLE = False
30
- print("Sentence Transformers not installed. Install with: pip install sentence-transformers")
31
 
32
- import numpy as np
33
- from typing import Tuple
34
-
35
- # Custom CSS (๊ธฐ์กด CSS + ์ถ”๊ฐ€ ์Šคํƒ€์ผ)
36
  custom_css = """
37
  .gradio-container {
38
  background: linear-gradient(135deg, #667eea 0%, #764ba2 25%, #f093fb 50%, #4facfe 75%, #00f2fe 100%);
@@ -79,81 +67,80 @@ custom_css = """
79
  border: 1px solid rgba(248, 113, 113, 0.5);
80
  color: #ef4444;
81
  }
82
- .pdf-processing {
83
- background-color: rgba(251, 191, 36, 0.2);
84
- border: 1px solid rgba(251, 191, 36, 0.5);
85
- color: #f59e0b;
 
 
 
 
 
 
 
86
  }
87
  """
88
 
89
  class SimpleTextSplitter:
90
- """๊ฐ„๋‹จํ•œ ํ…์ŠคํŠธ ๋ถ„ํ• ๊ธฐ"""
91
- def __init__(self, chunk_size=1000, chunk_overlap=200):
92
  self.chunk_size = chunk_size
93
  self.chunk_overlap = chunk_overlap
94
 
95
  def split_text(self, text: str) -> List[str]:
96
  """ํ…์ŠคํŠธ๋ฅผ ์ฒญํฌ๋กœ ๋ถ„ํ• """
97
  chunks = []
98
- start = 0
99
- text_len = len(text)
100
 
101
- while start < text_len:
102
- end = start + self.chunk_size
103
-
104
- # ๋ฌธ์žฅ ๋์—์„œ ์ž๋ฅด๊ธฐ ์œ„ํ•ด ๋งˆ์นจํ‘œ ์ฐพ๊ธฐ
105
- if end < text_len:
106
- last_period = text.rfind('.', start, end)
107
- if last_period != -1 and last_period > start:
108
- end = last_period + 1
109
-
110
- chunk = text[start:end].strip()
111
- if chunk:
112
- chunks.append(chunk)
113
-
114
- start = end - self.chunk_overlap
115
- if start < 0:
116
- start = 0
117
 
118
  return chunks
119
 
120
- class SimplePDFRAGSystem:
121
- """๊ฐ„๋‹จํ•œ PDF ๊ธฐ๋ฐ˜ RAG ์‹œ์Šคํ…œ"""
122
 
123
  def __init__(self):
124
  self.documents = {}
125
  self.document_chunks = {}
126
  self.embeddings_store = {}
127
- self.text_splitter = SimpleTextSplitter(chunk_size=1000, chunk_overlap=200)
128
 
129
- # ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ์ดˆ๊ธฐํ™” (๊ฐ€๋Šฅํ•œ ๊ฒฝ์šฐ)
130
  self.embedder = None
131
  if ST_AVAILABLE:
132
  try:
133
  self.embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
134
- print("Embedding model loaded successfully")
135
  except Exception as e:
136
- print(f"Failed to load embedding model: {e}")
137
 
138
  def extract_text_from_pdf(self, pdf_path: str) -> Dict[str, Any]:
139
  """PDF์—์„œ ํ…์ŠคํŠธ ์ถ”์ถœ"""
140
  if not PDF_AVAILABLE:
141
- # PyMuPDF๊ฐ€ ์—†๋Š” ๊ฒฝ์šฐ ๋Œ€์ฒด ๋ฐฉ๋ฒ•
142
  return {
143
  "metadata": {
144
  "title": "PDF Reader Not Available",
145
  "file_name": os.path.basename(pdf_path),
146
  "pages": 0
147
  },
148
- "full_text": "PDF ์ฒ˜๋ฆฌ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. 'pip install pymupdf'๋ฅผ ์‹คํ–‰ํ•ด์ฃผ์„ธ์š”."
149
  }
150
 
151
  try:
152
  doc = fitz.open(pdf_path)
153
  text_content = []
154
  metadata = {
155
- "title": doc.metadata.get("title", "Untitled"),
156
- "author": doc.metadata.get("author", "Unknown"),
157
  "pages": len(doc),
158
  "file_name": os.path.basename(pdf_path)
159
  }
@@ -184,7 +171,7 @@ class SimplePDFRAGSystem:
184
  # ์ฒญํฌ ์ €์žฅ
185
  self.document_chunks[doc_id] = chunks
186
 
187
- # ์ž„๋ฒ ๋”ฉ ์ƒ์„ฑ (๊ฐ€๋Šฅํ•œ ๊ฒฝ์šฐ)
188
  if self.embedder:
189
  embeddings = self.embedder.encode(chunks)
190
  self.embeddings_store[doc_id] = embeddings
@@ -193,8 +180,7 @@ class SimplePDFRAGSystem:
193
  self.documents[doc_id] = {
194
  "metadata": pdf_data["metadata"],
195
  "chunk_count": len(chunks),
196
- "upload_time": datetime.now().isoformat(),
197
- "full_text": pdf_data["full_text"][:500] # ์ฒ˜์Œ 500์ž ์ €์žฅ
198
  }
199
 
200
  return {
@@ -206,13 +192,10 @@ class SimplePDFRAGSystem:
206
  }
207
 
208
  except Exception as e:
209
- return {
210
- "success": False,
211
- "error": str(e)
212
- }
213
 
214
- def search_relevant_chunks(self, query: str, doc_ids: List[str], top_k: int = 5) -> List[Dict]:
215
- """์ฟผ๋ฆฌ์™€ ๊ด€๋ จ๋œ ์ฒญํฌ ๊ฒ€์ƒ‰"""
216
  all_relevant_chunks = []
217
 
218
  if self.embedder and self.embeddings_store:
@@ -230,79 +213,75 @@ class SimplePDFRAGSystem:
230
  sim = np.dot(query_embedding, emb) / (np.linalg.norm(query_embedding) * np.linalg.norm(emb))
231
  similarities.append(sim)
232
 
233
- # ์ƒ์œ„ k๊ฐœ ์„ ํƒ
234
  top_indices = np.argsort(similarities)[-top_k:][::-1]
235
 
236
  for idx in top_indices:
237
- all_relevant_chunks.append({
238
- "content": chunks[idx],
239
- "doc_id": doc_id,
240
- "doc_name": self.documents[doc_id]["metadata"]["file_name"],
241
- "chunk_index": idx,
242
- "similarity": similarities[idx]
243
- })
244
  else:
245
- # ์ž„๋ฒ ๋”ฉ์ด ์—†๋Š” ๊ฒฝ์šฐ ํ‚ค์›Œ๋“œ ๊ธฐ๋ฐ˜ ๊ฐ„๋‹จํ•œ ๊ฒ€์ƒ‰
246
- query_lower = query.lower()
247
- query_words = set(query_lower.split())
248
 
249
  for doc_id in doc_ids:
250
  if doc_id in self.document_chunks:
251
  chunks = self.document_chunks[doc_id]
252
- for idx, chunk in enumerate(chunks):
253
  chunk_lower = chunk.lower()
254
- # ์ฟผ๋ฆฌ ๋‹จ์–ด๊ฐ€ ์ฒญํฌ์— ํฌํ•จ๋˜์–ด ์žˆ๋Š”์ง€ ํ™•์ธ
255
- matching_words = sum(1 for word in query_words if word in chunk_lower)
256
- if matching_words > 0:
257
  all_relevant_chunks.append({
258
- "content": chunk,
259
- "doc_id": doc_id,
260
  "doc_name": self.documents[doc_id]["metadata"]["file_name"],
261
- "chunk_index": idx,
262
- "similarity": matching_words / len(query_words)
263
  })
264
 
265
- # ์œ ์‚ฌ๋„ ์ˆœ์œผ๋กœ ์ •๋ ฌํ•˜๊ณ  ์ƒ์œ„ k๊ฐœ ๋ฐ˜ํ™˜
266
  all_relevant_chunks.sort(key=lambda x: x.get('similarity', 0), reverse=True)
267
  return all_relevant_chunks[:top_k]
268
 
269
- def generate_context_prompt(self, query: str, chunks: List[Dict]) -> str:
270
- """์ปจํ…์ŠคํŠธ๋ฅผ ํฌํ•จํ•œ ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ"""
271
- if not chunks:
 
 
272
  return query
273
 
274
- context_parts = []
275
- for i, chunk in enumerate(chunks, 1):
276
- context_parts.append(
277
- f"[๋ฌธ์„œ: {chunk['doc_name']}, ์„น์…˜ {chunk['chunk_index']+1}]\n{chunk['content']}\n"
278
- )
279
 
280
- context = "\n---\n".join(context_parts)
 
 
 
 
281
 
282
- enhanced_prompt = f"""๋‹ค์Œ ๋ฌธ์„œ ๋‚ด์šฉ์„ ์ฐธ๊ณ ํ•˜์—ฌ ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•ด์ฃผ์„ธ์š”.
283
-
284
- ## ์ฐธ๊ณ  ๋ฌธ์„œ:
285
- {context}
286
-
287
- ## ์งˆ๋ฌธ:
288
- {query}
289
-
290
- ## ๋‹ต๋ณ€:
291
- ์œ„ ๋ฌธ์„œ ๋‚ด์šฉ์„ ๋ฐ”ํƒ•์œผ๋กœ ์งˆ๋ฌธ์— ๋Œ€ํ•ด ์ƒ์„ธํ•˜๊ณ  ์ •ํ™•ํ•˜๊ฒŒ ๋‹ต๋ณ€ํ•˜๊ฒ ์Šต๋‹ˆ๋‹ค."""
292
 
293
- return enhanced_prompt
294
 
295
  # RAG ์‹œ์Šคํ…œ ์ธ์Šคํ„ด์Šค ์ƒ์„ฑ
296
- rag_system = SimplePDFRAGSystem()
297
 
298
- # State variables
299
  current_model = gr.State("openai/gpt-oss-120b")
300
- rag_enabled = gr.State(False)
301
 
302
  def upload_pdf(file):
303
  """PDF ํŒŒ์ผ ์—…๋กœ๋“œ ์ฒ˜๋ฆฌ"""
304
  if file is None:
305
- return gr.update(value="ํŒŒ์ผ์„ ์„ ํƒํ•ด์ฃผ์„ธ์š”"), gr.update(choices=[]), gr.update(value=False)
 
 
 
 
306
 
307
  try:
308
  # ํŒŒ์ผ ํ•ด์‹œ๋ฅผ ID๋กœ ์‚ฌ์šฉ
@@ -318,175 +297,145 @@ def upload_pdf(file):
318
  status_html = f"""
319
  <div class="pdf-status pdf-success">
320
  โœ… PDF ์—…๋กœ๋“œ ์„ฑ๊ณต!<br>
321
- ๐Ÿ“„ ์ œ๋ชฉ: {result.get('title', 'Unknown')}<br>
322
  ๐Ÿ“‘ ํŽ˜์ด์ง€: {result['pages']}ํŽ˜์ด์ง€<br>
323
- ๐Ÿ” ์ƒ์„ฑ๋œ ์ฒญํฌ: {result['chunks']}๊ฐœ<br>
324
- ๐Ÿ†” ๋ฌธ์„œ ID: {doc_id}
325
  </div>
326
  """
327
 
328
  # ๋ฌธ์„œ ๋ชฉ๋ก ์—…๋ฐ์ดํŠธ
329
- doc_list = list(rag_system.documents.keys())
330
  doc_choices = [f"{doc_id}: {rag_system.documents[doc_id]['metadata']['file_name']}"
331
- for doc_id in doc_list]
332
 
333
- return status_html, gr.update(choices=doc_choices, value=doc_choices), gr.update(value=True)
 
 
 
 
334
  else:
335
  status_html = f"""
336
  <div class="pdf-status pdf-error">
337
- โŒ PDF ์—…๋กœ๋“œ ์‹คํŒจ<br>
338
- ์˜ค๋ฅ˜: {result['error']}
339
  </div>
340
  """
341
- return status_html, gr.update(choices=[]), gr.update(value=False)
342
 
343
  except Exception as e:
344
- status_html = f"""
345
- <div class="pdf-status pdf-error">
346
- โŒ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}
347
- </div>
348
- """
349
- return status_html, gr.update(choices=[]), gr.update(value=False)
350
 
351
  def clear_documents():
352
- """์—…๋กœ๋“œ๋œ ๋ฌธ์„œ ์ดˆ๊ธฐํ™”"""
353
- try:
354
- rag_system.documents = {}
355
- rag_system.document_chunks = {}
356
- rag_system.embeddings_store = {}
357
-
358
- return gr.update(value="<div class='pdf-status pdf-success'>โœ… ๋ชจ๋“  ๋ฌธ์„œ๊ฐ€ ์‚ญ์ œ๋˜์—ˆ์Šต๋‹ˆ๋‹ค</div>"), gr.update(choices=[], value=[]), gr.update(value=False)
359
- except Exception as e:
360
- return gr.update(value=f"<div class='pdf-status pdf-error'>โŒ ์‚ญ์ œ ์‹คํŒจ: {str(e)}</div>"), gr.update(), gr.update()
 
361
 
362
  def switch_model(model_choice):
363
- """๋ชจ๋ธ ์ „ํ™˜ ํ•จ์ˆ˜"""
364
- if model_choice == "openai/gpt-oss-120b":
365
- return gr.update(visible=True), gr.update(visible=False), model_choice
366
- else:
367
- return gr.update(visible=False), gr.update(visible=True), model_choice
368
 
369
- def chat_with_model(message: str, history: List[Tuple[str, str]], enable_rag: bool, selected_docs: List[str], top_k: int, model: str):
370
- """๋ชจ๋ธ๊ณผ ๋Œ€ํ™” (RAG ํฌํ•จ)"""
 
 
371
 
372
- # RAG๊ฐ€ ํ™œ์„ฑํ™”๋˜๊ณ  ๋ฌธ์„œ๊ฐ€ ์„ ํƒ๋œ ๊ฒฝ์šฐ
373
- if enable_rag and selected_docs:
374
- # ์„ ํƒ๋œ ๋ฌธ์„œ ID ์ถ”์ถœ
375
- doc_ids = [doc.split(":")[0] for doc in selected_docs]
376
-
377
- # ๊ด€๋ จ ์ฒญํฌ ๊ฒ€์ƒ‰
378
- relevant_chunks = rag_system.search_relevant_chunks(message, doc_ids, top_k)
379
-
380
- if relevant_chunks:
381
- # ์ปจํ…์ŠคํŠธ๋ฅผ ํฌํ•จํ•œ ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ
382
- enhanced_message = rag_system.generate_context_prompt(message, relevant_chunks)
383
-
384
- # ๋””๋ฒ„๊ทธ ์ •๋ณด ํฌํ•จ ์‘๋‹ต (์‹ค์ œ ๊ตฌํ˜„์‹œ ๋ชจ๋ธ API ํ˜ธ์ถœ๋กœ ๋Œ€์ฒด)
385
- response = f"""๐Ÿ“š RAG ๊ธฐ๋ฐ˜ ๋‹ต๋ณ€ (๋ชจ๋ธ: {model})
386
-
387
- ์ฐพ์€ ๊ด€๋ จ ๋ฌธ์„œ ์„น์…˜: {len(relevant_chunks)}๊ฐœ
388
-
389
- ์งˆ๋ฌธ: {message}
390
-
391
- ๋‹ต๋ณ€:
392
- {enhanced_message[:2000]}...
393
-
394
- [์ฐธ๊ณ : ์‹ค์ œ ๊ตฌํ˜„์‹œ ์—ฌ๊ธฐ์„œ ๋ชจ๋ธ API๋ฅผ ํ˜ธ์ถœํ•˜์—ฌ enhanced_message๋ฅผ ์ „์†กํ•˜๊ณ  ์‘๋‹ต์„ ๋ฐ›์•„์•ผ ํ•ฉ๋‹ˆ๋‹ค]
395
-
396
- ๊ด€๋ จ ๋ฌธ์„œ ์„น์…˜ ์š”์•ฝ:
397
- """
398
- for i, chunk in enumerate(relevant_chunks[:3], 1):
399
- response += f"\n{i}. {chunk['doc_name']} - ์„น์…˜ {chunk['chunk_index']+1} (์œ ์‚ฌ๋„: {chunk['similarity']:.2f})"
400
- response += f"\n ๋‚ด์šฉ: {chunk['content'][:200]}...\n"
401
- else:
402
- response = f"โš ๏ธ ์„ ํƒ๋œ ๋ฌธ์„œ์—์„œ '{message}'์™€ ๊ด€๋ จ๋œ ๋‚ด์šฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ๋‹ค๋ฅธ ์งˆ๋ฌธ์„ ์‹œ๋„ํ•ด๋ณด์„ธ์š”."
403
- else:
404
- # RAG ๋น„ํ™œ์„ฑํ™” ์ƒํƒœ
405
- response = f"""์ผ๋ฐ˜ ๋‹ต๋ณ€ ๋ชจ๋“œ (๋ชจ๋ธ: {model})
406
-
407
- ์งˆ๋ฌธ: {message}
408
-
409
- [์ฐธ๊ณ : ์‹ค์ œ ๊ตฌํ˜„์‹œ ์—ฌ๊ธฐ์„œ ๋ชจ๋ธ API๋ฅผ ํ˜ธ์ถœํ•˜์—ฌ message๋ฅผ ์ „์†กํ•˜๊ณ  ์‘๋‹ต์„ ๋ฐ›์•„์•ผ ํ•ฉ๋‹ˆ๋‹ค]
410
-
411
- PDF ๋ฌธ์„œ๋ฅผ ์—…๋กœ๋“œํ•˜๊ณ  RAG๋ฅผ ํ™œ์„ฑํ™”ํ•˜๋ฉด ๋ฌธ์„œ ๊ธฐ๋ฐ˜ ๋‹ต๋ณ€์„ ๋ฐ›์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค."""
412
 
413
- history.append((message, response))
414
- return "", history
 
 
 
 
 
415
 
416
- # Gradio ์ธํ„ฐํŽ˜์ด์Šค
417
  with gr.Blocks(fill_height=True, theme="Nymbo/Nymbo_Theme", css=custom_css) as demo:
 
 
 
 
 
 
 
 
 
 
418
  with gr.Row():
419
- # ์‚ฌ์ด๋“œ๋ฐ”
420
  with gr.Column(scale=1):
421
  with gr.Group(elem_classes="main-container"):
422
- gr.Markdown("# ๐Ÿš€ AI Chat with RAG")
423
  gr.Markdown(
424
- "PDF ๋ฌธ์„œ๋ฅผ ์—…๋กœ๋“œํ•˜์—ฌ AI๊ฐ€ ๋ฌธ์„œ ๋‚ด์šฉ์„ ์ฐธ๊ณ ํ•ด ๋‹ต๋ณ€ํ•˜๋„๋ก ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค."
 
425
  )
426
 
427
- # ๋ชจ๋ธ ์„ ํƒ
428
  model_dropdown = gr.Dropdown(
429
  choices=["openai/gpt-oss-120b", "openai/gpt-oss-20b"],
430
  value="openai/gpt-oss-120b",
431
- label="๐Ÿ“Š ๋ชจ๋ธ ์„ ํƒ"
 
432
  )
433
 
 
434
  login_button = gr.LoginButton("Sign in with Hugging Face", size="lg")
435
- reload_btn = gr.Button("๐Ÿ”„ ๋ชจ๋ธ ๋ณ€๊ฒฝ ์ ์šฉ", variant="primary", size="lg")
436
 
437
- # RAG ์„ค์ •
438
- with gr.Accordion("๐Ÿ“š PDF RAG ์„ค์ •", open=True):
 
 
 
439
  pdf_upload = gr.File(
440
- label="PDF ์—…๋กœ๋“œ",
441
  file_types=[".pdf"],
442
  type="filepath"
443
  )
444
 
445
  upload_status = gr.HTML(
446
- value="<div class='pdf-status'>PDF๋ฅผ ์—…๋กœ๋“œํ•˜์—ฌ RAG๋ฅผ ํ™œ์„ฑํ™”ํ•˜์„ธ์š”</div>"
447
  )
448
 
449
  document_list = gr.CheckboxGroup(
450
  choices=[],
451
  label="๐Ÿ“„ ์—…๋กœ๋“œ๋œ ๋ฌธ์„œ",
452
- info="์งˆ๋ฌธ์— ์ฐธ๊ณ ํ•  ๋ฌธ์„œ๋ฅผ ์„ ํƒํ•˜์„ธ์š”"
453
  )
454
 
455
- with gr.Row():
456
- clear_btn = gr.Button("๐Ÿ—‘๏ธ ๋ชจ๋“  ๋ฌธ์„œ ์‚ญ์ œ", size="sm")
457
- refresh_btn = gr.Button("๐Ÿ”„ ๋ชฉ๋ก ์ƒˆ๋กœ๊ณ ์นจ", size="sm")
458
 
459
  enable_rag = gr.Checkbox(
460
  label="RAG ํ™œ์„ฑํ™”",
461
  value=False,
462
- info="๋ฌธ์„œ ๊ธฐ๋ฐ˜ ๋‹ต๋ณ€ ์ƒ์„ฑ ํ™œ์„ฑํ™”"
463
  )
464
 
465
- with gr.Accordion("โš™๏ธ RAG ๊ณ ๊ธ‰ ์„ค์ •", open=False):
466
- top_k_chunks = gr.Slider(
467
- minimum=1,
468
- maximum=10,
469
- value=5,
470
- step=1,
471
- label="์ฐธ์กฐํ•  ์ฒญํฌ ์ˆ˜",
472
- info="๋‹ต๋ณ€ ์ƒ์„ฑ์‹œ ์ฐธ๊ณ ํ•  ๋ฌธ์„œ ์ฒญํฌ์˜ ๊ฐœ์ˆ˜"
473
- )
474
-
475
- gr.Markdown("""
476
- ### ๐Ÿ“ RAG ์‚ฌ์šฉ ํŒ:
477
- 1. PDF ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜์„ธ์š”
478
- 2. ์—…๋กœ๋“œ๋œ ๋ฌธ์„œ๋ฅผ ์„ ํƒํ•˜์„ธ์š”
479
- 3. RAG๋ฅผ ํ™œ์„ฑํ™”ํ•˜์„ธ์š”
480
- 4. ๋ฌธ์„œ ๋‚ด์šฉ์— ๋Œ€ํ•ด ์งˆ๋ฌธํ•˜์„ธ์š”
481
-
482
- ์˜ˆ์‹œ ์งˆ๋ฌธ:
483
- - "๋ฌธ์„œ์˜ ์ฃผ์š” ๋‚ด์šฉ์„ ์š”์•ฝํ•ด์ฃผ์„ธ์š”"
484
- - "์ด ๋ฌธ์„œ์—์„œ ์–ธ๊ธ‰๋œ ๋‚ ์งœ๋Š” ์–ธ์ œ์ธ๊ฐ€์š”?"
485
- - "์ฐธ๊ฐ€ ์ž๊ฒฉ ์กฐ๊ฑด์€ ๋ฌด์—‡์ธ๊ฐ€์š”?"
486
- """)
487
 
488
- # ๊ณ ๊ธ‰ ์˜ต์…˜
489
- with gr.Accordion("โš™๏ธ ๋ชจ๋ธ ์„ค์ •", open=False):
 
490
  temperature = gr.Slider(
491
  minimum=0,
492
  maximum=2,
@@ -502,52 +451,75 @@ with gr.Blocks(fill_height=True, theme="Nymbo/Nymbo_Theme", css=custom_css) as d
502
  label="Max Tokens"
503
  )
504
 
505
- # ๋ฉ”์ธ ์ฑ„ํŒ… ์˜์—ญ
506
  with gr.Column(scale=3):
507
  with gr.Group(elem_classes="main-container"):
508
  gr.Markdown("## ๐Ÿ’ฌ Chat Interface")
509
 
510
  # RAG ์ƒํƒœ ํ‘œ์‹œ
511
- with gr.Row():
512
- rag_status = gr.HTML(
513
- value="<div style='padding: 10px; background: rgba(59, 130, 246, 0.1); border-radius: 8px; margin-bottom: 10px;'>๐Ÿ” RAG: <strong>๋น„ํ™œ์„ฑํ™”</strong></div>"
514
- )
515
 
516
- # ๋ชจ๋ธ ์ธํ„ฐํŽ˜์ด์Šค ์ปจํ…Œ์ด๋„ˆ
 
 
 
517
  with gr.Column(visible=True) as model_120b_container:
518
  gr.Markdown("### Model: openai/gpt-oss-120b")
519
- chatbot_120b = gr.Chatbot(height=400)
520
- msg_box_120b = gr.Textbox(
521
- label="๋ฉ”์‹œ์ง€ ์ž…๋ ฅ",
522
- placeholder="PDF ๋‚ด์šฉ์— ๋Œ€ํ•ด ์งˆ๋ฌธํ•ด๋ณด์„ธ์š”...",
523
- lines=2
524
- )
525
- with gr.Row():
526
- send_btn_120b = gr.Button("๐Ÿ“ค ์ „์†ก", variant="primary")
527
- clear_btn_120b = gr.Button("๐Ÿ—‘๏ธ ๋Œ€ํ™” ์ดˆ๊ธฐํ™”")
 
 
 
 
 
 
 
 
 
 
 
528
 
529
  with gr.Column(visible=False) as model_20b_container:
530
  gr.Markdown("### Model: openai/gpt-oss-20b")
531
- chatbot_20b = gr.Chatbot(height=400)
532
- msg_box_20b = gr.Textbox(
533
- label="๋ฉ”์‹œ์ง€ ์ž…๋ ฅ",
534
- placeholder="PDF ๋‚ด์šฉ์— ๋Œ€ํ•ด ์งˆ๋ฌธํ•ด๋ณด์„ธ์š”...",
535
- lines=2
536
- )
537
- with gr.Row():
538
- send_btn_20b = gr.Button("๐Ÿ“ค ์ „์†ก", variant="primary")
539
- clear_btn_20b = gr.Button("๐Ÿ—‘๏ธ ๋Œ€ํ™” ์ดˆ๊ธฐํ™”")
 
 
 
 
 
 
 
 
 
 
540
 
541
- # ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ
542
 
543
- # PDF ์—…๋กœ๋“œ ์ฒ˜๋ฆฌ
544
  pdf_upload.upload(
545
  fn=upload_pdf,
546
  inputs=[pdf_upload],
547
  outputs=[upload_status, document_list, enable_rag]
548
  )
549
 
550
- # ๋ฌธ์„œ ์ดˆ๊ธฐํ™”
551
  clear_btn.click(
552
  fn=clear_documents,
553
  outputs=[upload_status, document_list, enable_rag]
@@ -556,7 +528,7 @@ with gr.Blocks(fill_height=True, theme="Nymbo/Nymbo_Theme", css=custom_css) as d
556
  # RAG ์ƒํƒœ ์—…๋ฐ์ดํŠธ
557
  enable_rag.change(
558
  fn=lambda x: gr.update(
559
- value=f"<div style='padding: 10px; background: rgba(59, 130, 246, 0.1); border-radius: 8px; margin-bottom: 10px;'>๐Ÿ” RAG: <strong>{'ํ™œ์„ฑํ™”' if x else '๋น„ํ™œ์„ฑํ™”'}</strong></div>"
560
  ),
561
  inputs=[enable_rag],
562
  outputs=[rag_status]
@@ -568,46 +540,59 @@ with gr.Blocks(fill_height=True, theme="Nymbo/Nymbo_Theme", css=custom_css) as d
568
  inputs=[model_dropdown],
569
  outputs=[model_120b_container, model_20b_container, current_model]
570
  ).then(
571
- fn=lambda: gr.Info("๋ชจ๋ธ์ด ์„ฑ๊ณต์ ์œผ๋กœ ์ „ํ™˜๋˜์—ˆ์Šต๋‹ˆ๋‹ค!"),
572
  inputs=[],
573
  outputs=[]
574
  )
575
 
576
- # 120b ๋ชจ๋ธ ์ฑ„ํŒ…
577
- msg_box_120b.submit(
578
- fn=lambda msg, hist, rag, docs, k: chat_with_model(msg, hist, rag, docs, k, "openai/gpt-oss-120b"),
579
- inputs=[msg_box_120b, chatbot_120b, enable_rag, document_list, top_k_chunks],
580
- outputs=[msg_box_120b, chatbot_120b]
581
- )
582
 
583
- send_btn_120b.click(
584
- fn=lambda msg, hist, rag, docs, k: chat_with_model(msg, hist, rag, docs, k, "openai/gpt-oss-120b"),
585
- inputs=[msg_box_120b, chatbot_120b, enable_rag, document_list, top_k_chunks],
586
- outputs=[msg_box_120b, chatbot_120b]
587
  )
588
 
589
- clear_btn_120b.click(
590
- lambda: ([], ""),
591
- outputs=[chatbot_120b, msg_box_120b]
 
 
 
 
 
 
 
 
 
 
 
 
 
592
  )
593
 
594
- # 20b ๋ชจ๋ธ ์ฑ„ํŒ…
595
- msg_box_20b.submit(
596
- fn=lambda msg, hist, rag, docs, k: chat_with_model(msg, hist, rag, docs, k, "openai/gpt-oss-20b"),
597
- inputs=[msg_box_20b, chatbot_20b, enable_rag, document_list, top_k_chunks],
598
- outputs=[msg_box_20b, chatbot_20b]
599
  )
600
 
 
601
  send_btn_20b.click(
602
- fn=lambda msg, hist, rag, docs, k: chat_with_model(msg, hist, rag, docs, k, "openai/gpt-oss-20b"),
603
- inputs=[msg_box_20b, chatbot_20b, enable_rag, document_list, top_k_chunks],
604
- outputs=[msg_box_20b, chatbot_20b]
605
  )
606
 
607
- clear_btn_20b.click(
608
- lambda: ([], ""),
609
- outputs=[chatbot_20b, msg_box_20b]
 
610
  )
611
 
612
- if __name__ == "__main__":
613
- demo.launch()
 
2
  import os
3
  from typing import List, Dict, Any, Optional
4
  import hashlib
 
5
  from datetime import datetime
6
+ import numpy as np
7
 
8
+ # PDF ์ฒ˜๋ฆฌ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ
9
  try:
10
  import fitz # PyMuPDF
11
  PDF_AVAILABLE = True
12
  except ImportError:
13
  PDF_AVAILABLE = False
14
+ print("โš ๏ธ PyMuPDF not installed. Install with: pip install pymupdf")
 
 
 
 
 
 
 
 
15
 
16
  try:
17
  from sentence_transformers import SentenceTransformer
18
  ST_AVAILABLE = True
19
  except ImportError:
20
  ST_AVAILABLE = False
21
+ print("โš ๏ธ Sentence Transformers not installed. Install with: pip install sentence-transformers")
22
 
23
+ # Custom CSS for gradient background and styling
 
 
 
24
  custom_css = """
25
  .gradio-container {
26
  background: linear-gradient(135deg, #667eea 0%, #764ba2 25%, #f093fb 50%, #4facfe 75%, #00f2fe 100%);
 
67
  border: 1px solid rgba(248, 113, 113, 0.5);
68
  color: #ef4444;
69
  }
70
+ .pdf-info {
71
+ background-color: rgba(59, 130, 246, 0.2);
72
+ border: 1px solid rgba(59, 130, 246, 0.5);
73
+ color: #3b82f6;
74
+ }
75
+ .rag-context {
76
+ background-color: rgba(251, 191, 36, 0.1);
77
+ border-left: 4px solid #f59e0b;
78
+ padding: 10px;
79
+ margin: 10px 0;
80
+ border-radius: 5px;
81
  }
82
  """
83
 
84
  class SimpleTextSplitter:
85
+ """ํ…์ŠคํŠธ ๋ถ„ํ• ๊ธฐ"""
86
+ def __init__(self, chunk_size=800, chunk_overlap=100):
87
  self.chunk_size = chunk_size
88
  self.chunk_overlap = chunk_overlap
89
 
90
  def split_text(self, text: str) -> List[str]:
91
  """ํ…์ŠคํŠธ๋ฅผ ์ฒญํฌ๋กœ ๋ถ„ํ• """
92
  chunks = []
93
+ sentences = text.split('. ')
94
+ current_chunk = ""
95
 
96
+ for sentence in sentences:
97
+ if len(current_chunk) + len(sentence) < self.chunk_size:
98
+ current_chunk += sentence + ". "
99
+ else:
100
+ if current_chunk:
101
+ chunks.append(current_chunk.strip())
102
+ current_chunk = sentence + ". "
103
+
104
+ if current_chunk:
105
+ chunks.append(current_chunk.strip())
 
 
 
 
 
 
106
 
107
  return chunks
108
 
109
+ class PDFRAGSystem:
110
+ """PDF ๊ธฐ๋ฐ˜ RAG ์‹œ์Šคํ…œ"""
111
 
112
  def __init__(self):
113
  self.documents = {}
114
  self.document_chunks = {}
115
  self.embeddings_store = {}
116
+ self.text_splitter = SimpleTextSplitter(chunk_size=800, chunk_overlap=100)
117
 
118
+ # ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ์ดˆ๊ธฐํ™”
119
  self.embedder = None
120
  if ST_AVAILABLE:
121
  try:
122
  self.embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
123
+ print("โœ… ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต")
124
  except Exception as e:
125
+ print(f"โš ๏ธ ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
126
 
127
  def extract_text_from_pdf(self, pdf_path: str) -> Dict[str, Any]:
128
  """PDF์—์„œ ํ…์ŠคํŠธ ์ถ”์ถœ"""
129
  if not PDF_AVAILABLE:
 
130
  return {
131
  "metadata": {
132
  "title": "PDF Reader Not Available",
133
  "file_name": os.path.basename(pdf_path),
134
  "pages": 0
135
  },
136
+ "full_text": "PDF ์ฒ˜๋ฆฌ๋ฅผ ์œ„ํ•ด 'pip install pymupdf'๋ฅผ ์‹คํ–‰ํ•ด์ฃผ์„ธ์š”."
137
  }
138
 
139
  try:
140
  doc = fitz.open(pdf_path)
141
  text_content = []
142
  metadata = {
143
+ "title": doc.metadata.get("title", os.path.basename(pdf_path)),
 
144
  "pages": len(doc),
145
  "file_name": os.path.basename(pdf_path)
146
  }
 
171
  # ์ฒญํฌ ์ €์žฅ
172
  self.document_chunks[doc_id] = chunks
173
 
174
+ # ์ž„๋ฒ ๋”ฉ ์ƒ์„ฑ
175
  if self.embedder:
176
  embeddings = self.embedder.encode(chunks)
177
  self.embeddings_store[doc_id] = embeddings
 
180
  self.documents[doc_id] = {
181
  "metadata": pdf_data["metadata"],
182
  "chunk_count": len(chunks),
183
+ "upload_time": datetime.now().isoformat()
 
184
  }
185
 
186
  return {
 
192
  }
193
 
194
  except Exception as e:
195
+ return {"success": False, "error": str(e)}
 
 
 
196
 
197
+ def search_relevant_chunks(self, query: str, doc_ids: List[str], top_k: int = 3) -> List[Dict]:
198
+ """๊ด€๋ จ ์ฒญํฌ ๊ฒ€์ƒ‰"""
199
  all_relevant_chunks = []
200
 
201
  if self.embedder and self.embeddings_store:
 
213
  sim = np.dot(query_embedding, emb) / (np.linalg.norm(query_embedding) * np.linalg.norm(emb))
214
  similarities.append(sim)
215
 
216
+ # ์ƒ์œ„ ์ฒญํฌ ์„ ํƒ
217
  top_indices = np.argsort(similarities)[-top_k:][::-1]
218
 
219
  for idx in top_indices:
220
+ if similarities[idx] > 0.2:
221
+ all_relevant_chunks.append({
222
+ "content": chunks[idx],
223
+ "doc_name": self.documents[doc_id]["metadata"]["file_name"],
224
+ "similarity": similarities[idx]
225
+ })
 
226
  else:
227
+ # ํ‚ค์›Œ๋“œ ๊ธฐ๋ฐ˜ ๊ฒ€์ƒ‰
228
+ query_keywords = set(query.lower().split())
 
229
 
230
  for doc_id in doc_ids:
231
  if doc_id in self.document_chunks:
232
  chunks = self.document_chunks[doc_id]
233
+ for chunk in chunks[:top_k]: # ์ฒ˜์Œ ๋ช‡ ๊ฐœ๋งŒ ์‚ฌ์šฉ
234
  chunk_lower = chunk.lower()
235
+ score = sum(1 for keyword in query_keywords if keyword in chunk_lower)
236
+ if score > 0:
 
237
  all_relevant_chunks.append({
238
+ "content": chunk[:500], # ๊ธธ์ด ์ œํ•œ
 
239
  "doc_name": self.documents[doc_id]["metadata"]["file_name"],
240
+ "similarity": score / len(query_keywords) if query_keywords else 0
 
241
  })
242
 
243
+ # ์ •๋ ฌ ๋ฐ ๋ฐ˜ํ™˜
244
  all_relevant_chunks.sort(key=lambda x: x.get('similarity', 0), reverse=True)
245
  return all_relevant_chunks[:top_k]
246
 
247
+ def create_rag_prompt(self, query: str, doc_ids: List[str], top_k: int = 3) -> str:
248
+ """RAG ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ"""
249
+ relevant_chunks = self.search_relevant_chunks(query, doc_ids, top_k)
250
+
251
+ if not relevant_chunks:
252
  return query
253
 
254
+ # ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ
255
+ prompt_parts = []
256
+ prompt_parts.append("๋‹ค์Œ ๋ฌธ์„œ ๋‚ด์šฉ์„ ์ฐธ๊ณ ํ•˜์—ฌ ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•ด์ฃผ์„ธ์š”:\n")
257
+ prompt_parts.append("=" * 50)
 
258
 
259
+ for i, chunk in enumerate(relevant_chunks, 1):
260
+ prompt_parts.append(f"\n[์ฐธ๊ณ ๋ฌธ์„œ {i} - {chunk['doc_name']}]")
261
+ content = chunk['content'][:400] if len(chunk['content']) > 400 else chunk['content']
262
+ prompt_parts.append(content)
263
+ prompt_parts.append("")
264
 
265
+ prompt_parts.append("=" * 50)
266
+ prompt_parts.append(f"\n์งˆ๋ฌธ: {query}")
267
+ prompt_parts.append("\n์œ„ ์ฐธ๊ณ ๋ฌธ์„œ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ์ž์„ธํ•˜๊ณ  ์ •ํ™•ํ•˜๊ฒŒ ๋‹ต๋ณ€ํ•ด์ฃผ์„ธ์š”:")
 
 
 
 
 
 
 
268
 
269
+ return "\n".join(prompt_parts)
270
 
271
  # RAG ์‹œ์Šคํ…œ ์ธ์Šคํ„ด์Šค ์ƒ์„ฑ
272
+ rag_system = PDFRAGSystem()
273
 
274
+ # State variable to track current model
275
  current_model = gr.State("openai/gpt-oss-120b")
 
276
 
277
  def upload_pdf(file):
278
  """PDF ํŒŒ์ผ ์—…๋กœ๋“œ ์ฒ˜๋ฆฌ"""
279
  if file is None:
280
+ return (
281
+ gr.update(value="<div class='pdf-status pdf-error'>ํŒŒ์ผ์„ ์„ ํƒํ•ด์ฃผ์„ธ์š”</div>"),
282
+ gr.update(choices=[]),
283
+ gr.update(value=False)
284
+ )
285
 
286
  try:
287
  # ํŒŒ์ผ ํ•ด์‹œ๋ฅผ ID๋กœ ์‚ฌ์šฉ
 
297
  status_html = f"""
298
  <div class="pdf-status pdf-success">
299
  โœ… PDF ์—…๋กœ๋“œ ์„ฑ๊ณต!<br>
300
+ ๐Ÿ“„ ํŒŒ์ผ: {result['title']}<br>
301
  ๐Ÿ“‘ ํŽ˜์ด์ง€: {result['pages']}ํŽ˜์ด์ง€<br>
302
+ ๐Ÿ” ์ฒญํฌ: {result['chunks']}๊ฐœ ์ƒ์„ฑ
 
303
  </div>
304
  """
305
 
306
  # ๋ฌธ์„œ ๋ชฉ๋ก ์—…๋ฐ์ดํŠธ
 
307
  doc_choices = [f"{doc_id}: {rag_system.documents[doc_id]['metadata']['file_name']}"
308
+ for doc_id in rag_system.documents.keys()]
309
 
310
+ return (
311
+ status_html,
312
+ gr.update(choices=doc_choices, value=doc_choices),
313
+ gr.update(value=True)
314
+ )
315
  else:
316
  status_html = f"""
317
  <div class="pdf-status pdf-error">
318
+ โŒ ์—…๋กœ๋“œ ์‹คํŒจ: {result['error']}
 
319
  </div>
320
  """
321
+ return status_html, gr.update(), gr.update(value=False)
322
 
323
  except Exception as e:
324
+ return (
325
+ f"<div class='pdf-status pdf-error'>โŒ ์˜ค๋ฅ˜: {str(e)}</div>",
326
+ gr.update(),
327
+ gr.update(value=False)
328
+ )
 
329
 
330
  def clear_documents():
331
+ """๋ฌธ์„œ ์ดˆ๊ธฐํ™”"""
332
+ rag_system.documents = {}
333
+ rag_system.document_chunks = {}
334
+ rag_system.embeddings_store = {}
335
+
336
+ return (
337
+ gr.update(value="<div class='pdf-status pdf-success'>โœ… ๋ชจ๋“  ๋ฌธ์„œ๊ฐ€ ์‚ญ์ œ๋˜์—ˆ์Šต๋‹ˆ๋‹ค</div>"),
338
+ gr.update(choices=[], value=[]),
339
+ gr.update(value=False)
340
+ )
341
 
342
  def switch_model(model_choice):
343
+ """Function to switch between models"""
344
+ return gr.update(visible=False), gr.update(visible=True), model_choice
 
 
 
345
 
346
+ def create_rag_context_display(query, selected_docs, top_k):
347
+ """RAG ์ปจํ…์ŠคํŠธ ํ‘œ์‹œ์šฉ HTML ์ƒ์„ฑ"""
348
+ if not selected_docs:
349
+ return ""
350
 
351
+ doc_ids = [doc.split(":")[0] for doc in selected_docs]
352
+ chunks = rag_system.search_relevant_chunks(query, doc_ids, top_k)
353
+
354
+ if not chunks:
355
+ return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
 
357
+ html = "<div class='rag-context'><strong>๐Ÿ“š ์ฐธ๊ณ  ๋ฌธ์„œ:</strong><br>"
358
+ for i, chunk in enumerate(chunks, 1):
359
+ html += f"<br>{i}. {chunk['doc_name']} (์œ ์‚ฌ๋„: {chunk['similarity']:.2f})<br>"
360
+ html += f"<small>{chunk['content'][:200]}...</small><br>"
361
+ html += "</div>"
362
+
363
+ return html
364
 
365
+ # Main interface
366
  with gr.Blocks(fill_height=True, theme="Nymbo/Nymbo_Theme", css=custom_css) as demo:
367
+ # JavaScript to handle message passing
368
+ gr.HTML("""
369
+ <script>
370
+ function sendToModel(processedMsg) {
371
+ // This function would send the processed message to the model
372
+ console.log("Sending to model:", processedMsg);
373
+ }
374
+ </script>
375
+ """)
376
+
377
  with gr.Row():
378
+ # Sidebar
379
  with gr.Column(scale=1):
380
  with gr.Group(elem_classes="main-container"):
381
+ gr.Markdown("# ๐Ÿš€ Inference Provider + RAG")
382
  gr.Markdown(
383
+ "OpenAI GPT-OSS models with PDF RAG support. "
384
+ "Sign in with your Hugging Face account to use this API."
385
  )
386
 
387
+ # Model selection
388
  model_dropdown = gr.Dropdown(
389
  choices=["openai/gpt-oss-120b", "openai/gpt-oss-20b"],
390
  value="openai/gpt-oss-120b",
391
+ label="๐Ÿ“Š Select Model",
392
+ info="Choose between different model sizes"
393
  )
394
 
395
+ # Login button
396
  login_button = gr.LoginButton("Sign in with Hugging Face", size="lg")
 
397
 
398
+ # Reload button to apply model change
399
+ reload_btn = gr.Button("๐Ÿ”„ Apply Model Change", variant="primary", size="lg")
400
+
401
+ # RAG Settings
402
+ with gr.Accordion("๐Ÿ“š PDF RAG Settings", open=True):
403
  pdf_upload = gr.File(
404
+ label="Upload PDF",
405
  file_types=[".pdf"],
406
  type="filepath"
407
  )
408
 
409
  upload_status = gr.HTML(
410
+ value="<div class='pdf-status pdf-info'>๐Ÿ“ค PDF๋ฅผ ์—…๋กœ๋“œํ•˜์—ฌ ๋ฌธ์„œ ๊ธฐ๋ฐ˜ ๋‹ต๋ณ€์„ ๋ฐ›์œผ์„ธ์š”</div>"
411
  )
412
 
413
  document_list = gr.CheckboxGroup(
414
  choices=[],
415
  label="๐Ÿ“„ ์—…๋กœ๋“œ๋œ ๋ฌธ์„œ",
416
+ info="์ฐธ๊ณ ํ•  ๋ฌธ์„œ๋ฅผ ์„ ํƒํ•˜์„ธ์š”"
417
  )
418
 
419
+ clear_btn = gr.Button("๐Ÿ—‘๏ธ ๋ชจ๋“  ๋ฌธ์„œ ์‚ญ์ œ", size="sm")
 
 
420
 
421
  enable_rag = gr.Checkbox(
422
  label="RAG ํ™œ์„ฑํ™”",
423
  value=False,
424
+ info="์„ ํƒํ•œ ๋ฌธ์„œ๋ฅผ ์ฐธ๊ณ ํ•˜์—ฌ ๋‹ต๋ณ€ ์ƒ์„ฑ"
425
  )
426
 
427
+ top_k_chunks = gr.Slider(
428
+ minimum=1,
429
+ maximum=5,
430
+ value=3,
431
+ step=1,
432
+ label="์ฐธ์กฐ ์ฒญํฌ ์ˆ˜",
433
+ info="๋‹ต๋ณ€ ์ƒ์„ฑ์‹œ ์ฐธ๊ณ ํ•  ๋ฌธ์„œ ์กฐ๊ฐ ๊ฐœ์ˆ˜"
434
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
435
 
436
+ # Additional options
437
+ with gr.Accordion("โš™๏ธ Advanced Options", open=False):
438
+ gr.Markdown("*These options will be available after model implementation*")
439
  temperature = gr.Slider(
440
  minimum=0,
441
  maximum=2,
 
451
  label="Max Tokens"
452
  )
453
 
454
+ # Main chat area
455
  with gr.Column(scale=3):
456
  with gr.Group(elem_classes="main-container"):
457
  gr.Markdown("## ๐Ÿ’ฌ Chat Interface")
458
 
459
  # RAG ์ƒํƒœ ํ‘œ์‹œ
460
+ rag_status = gr.HTML(
461
+ value="<div class='pdf-status pdf-info'>๐Ÿ” RAG: <strong>๋น„ํ™œ์„ฑํ™”</strong></div>"
462
+ )
 
463
 
464
+ # RAG ์ปจํ…์ŠคํŠธ ํ‘œ์‹œ ์˜์—ญ
465
+ rag_context_display = gr.HTML(value="", visible=False)
466
+
467
+ # Container for model interfaces
468
  with gr.Column(visible=True) as model_120b_container:
469
  gr.Markdown("### Model: openai/gpt-oss-120b")
470
+
471
+ # RAG ์ฒ˜๋ฆฌ๋ฅผ ์œ„ํ•œ ์ปค์Šคํ…€ ์ธํ„ฐํŽ˜์ด์Šค
472
+ with gr.Group():
473
+ # ์‚ฌ์šฉ์ž ์ž…๋ ฅ ํ…์ŠคํŠธ๋ฐ•์Šค
474
+ user_input = gr.Textbox(
475
+ label="๋ฉ”์‹œ์ง€ ์ž…๋ ฅ",
476
+ placeholder="๋ฌธ์„œ์— ๋Œ€ํ•ด ์งˆ๋ฌธํ•˜๊ฑฐ๋‚˜ ์ผ๋ฐ˜ ๋Œ€ํ™”๋ฅผ ์‹œ์ž‘ํ•˜์„ธ์š”...",
477
+ lines=2
478
+ )
479
+
480
+ with gr.Row():
481
+ send_btn = gr.Button("๐Ÿ“ค ์ „์†ก", variant="primary")
482
+ clear_chat_btn = gr.Button("๐Ÿ—‘๏ธ ๋Œ€ํ™” ์ดˆ๊ธฐํ™”")
483
+
484
+ # ์›๋ณธ ๋ชจ๋ธ ๋กœ๋“œ
485
+ original_model = gr.load(
486
+ "models/openai/gpt-oss-120b",
487
+ accept_token=login_button,
488
+ provider="fireworks-ai"
489
+ )
490
 
491
  with gr.Column(visible=False) as model_20b_container:
492
  gr.Markdown("### Model: openai/gpt-oss-20b")
493
+
494
+ with gr.Group():
495
+ # ์‚ฌ์šฉ์ž ์ž…๋ ฅ ํ…์ŠคํŠธ๋ฐ•์Šค (20b์šฉ)
496
+ user_input_20b = gr.Textbox(
497
+ label="๋ฉ”์‹œ์ง€ ์ž…๋ ฅ",
498
+ placeholder="๋ฌธ์„œ์— ๋Œ€ํ•ด ์งˆ๋ฌธํ•˜๊ฑฐ๋‚˜ ์ผ๋ฐ˜ ๋Œ€ํ™”๋ฅผ ์‹œ์ž‘ํ•˜์„ธ์š”...",
499
+ lines=2
500
+ )
501
+
502
+ with gr.Row():
503
+ send_btn_20b = gr.Button("๐Ÿ“ค ์ „์†ก", variant="primary")
504
+ clear_chat_btn_20b = gr.Button("๐Ÿ—‘๏ธ ๋Œ€ํ™” ์ดˆ๊ธฐํ™”")
505
+
506
+ # ์›๋ณธ ๋ชจ๋ธ ๋กœ๋“œ
507
+ original_model_20b = gr.load(
508
+ "models/openai/gpt-oss-20b",
509
+ accept_token=login_button,
510
+ provider="fireworks-ai"
511
+ )
512
 
513
+ # Event Handlers
514
 
515
+ # PDF ์—…๋กœ๋“œ
516
  pdf_upload.upload(
517
  fn=upload_pdf,
518
  inputs=[pdf_upload],
519
  outputs=[upload_status, document_list, enable_rag]
520
  )
521
 
522
+ # ๋ฌธ์„œ ์‚ญ์ œ
523
  clear_btn.click(
524
  fn=clear_documents,
525
  outputs=[upload_status, document_list, enable_rag]
 
528
  # RAG ์ƒํƒœ ์—…๋ฐ์ดํŠธ
529
  enable_rag.change(
530
  fn=lambda x: gr.update(
531
+ value=f"<div class='pdf-status pdf-info'>๐Ÿ” RAG: <strong>{'ํ™œ์„ฑํ™”' if x else '๋น„ํ™œ์„ฑํ™”'}</strong></div>"
532
  ),
533
  inputs=[enable_rag],
534
  outputs=[rag_status]
 
540
  inputs=[model_dropdown],
541
  outputs=[model_120b_container, model_20b_container, current_model]
542
  ).then(
543
+ fn=lambda: gr.Info("Model switched successfully!"),
544
  inputs=[],
545
  outputs=[]
546
  )
547
 
548
+ # Update visibility based on dropdown selection
549
+ def update_visibility(model_choice):
550
+ if model_choice == "openai/gpt-oss-120b":
551
+ return gr.update(visible=True), gr.update(visible=False)
552
+ else:
553
+ return gr.update(visible=False), gr.update(visible=True)
554
 
555
+ model_dropdown.change(
556
+ fn=update_visibility,
557
+ inputs=[model_dropdown],
558
+ outputs=[model_120b_container, model_20b_container]
559
  )
560
 
561
+ # ๋ฉ”์‹œ์ง€ ์ „์†ก ์ฒ˜๋ฆฌ (RAG ํฌํ•จ)
562
+ def process_message(message, enable_rag, selected_docs, top_k):
563
+ """๋ฉ”์‹œ์ง€๋ฅผ RAG๋กœ ์ฒ˜๋ฆฌํ•˜์—ฌ ๋ชจ๋ธ์— ์ „์†ก"""
564
+ if enable_rag and selected_docs:
565
+ doc_ids = [doc.split(":")[0] for doc in selected_docs]
566
+ enhanced_message = rag_system.create_rag_prompt(message, doc_ids, top_k)
567
+ context_html = create_rag_context_display(message, selected_docs, top_k)
568
+ return enhanced_message, gr.update(value=context_html, visible=True)
569
+ else:
570
+ return message, gr.update(value="", visible=False)
571
+
572
+ # 120b ๋ชจ๋ธ์šฉ ์ด๋ฒคํŠธ
573
+ send_btn.click(
574
+ fn=process_message,
575
+ inputs=[user_input, enable_rag, document_list, top_k_chunks],
576
+ outputs=[user_input, rag_context_display]
577
  )
578
 
579
+ user_input.submit(
580
+ fn=process_message,
581
+ inputs=[user_input, enable_rag, document_list, top_k_chunks],
582
+ outputs=[user_input, rag_context_display]
 
583
  )
584
 
585
+ # 20b ๋ชจ๋ธ์šฉ ์ด๋ฒคํŠธ
586
  send_btn_20b.click(
587
+ fn=process_message,
588
+ inputs=[user_input_20b, enable_rag, document_list, top_k_chunks],
589
+ outputs=[user_input_20b, rag_context_display]
590
  )
591
 
592
+ user_input_20b.submit(
593
+ fn=process_message,
594
+ inputs=[user_input_20b, enable_rag, document_list, top_k_chunks],
595
+ outputs=[user_input_20b, rag_context_display]
596
  )
597
 
598
+ demo.launch()