gpt-oss-RAG

Running on Zero

App Files Files Community

openfree commited on 11 days ago

Commit

4ac1625

verified ·

1 Parent(s): 16c5e93

Update app.py

Browse files

Files changed (1) hide show

app.py +221 -215

app.py CHANGED Viewed

@@ -1,9 +1,14 @@
 import gradio as gr
 import os
 from typing import List, Dict, Any, Optional, Tuple
 import hashlib
 from datetime import datetime
 import numpy as np
 # PDF 처리 라이브러리
 try:
@@ -20,7 +25,7 @@ except ImportError:
     ST_AVAILABLE = False
     print("⚠️ Sentence Transformers not installed. Install with: pip install sentence-transformers")
-# Soft and bright custom CSS
 custom_css = """
 .gradio-container {
     background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
@@ -37,7 +42,6 @@ custom_css = """
     margin: 12px;
 }
-/* Status messages styling */
 .pdf-status {
     padding: 12px 16px;
     border-radius: 12px;
@@ -72,6 +76,14 @@ custom_css = """
     border-radius: 8px;
     font-size: 0.9rem;
 }
 """
 class SimpleTextSplitter:
@@ -237,44 +249,128 @@ class PDFRAGSystem:
         all_relevant_chunks.sort(key=lambda x: x.get('similarity', 0), reverse=True)
         return all_relevant_chunks[:top_k]
-    def create_rag_prompt(self, query: str, doc_ids: List[str], top_k: int = 3) -> str:
-        """RAG 프롬프트 생성"""
         relevant_chunks = self.search_relevant_chunks(query, doc_ids, top_k)
         if not relevant_chunks:
-            return query
-        # 프롬프트 구성
-        prompt_parts = []
-        prompt_parts.append("다음 문서 내용을 참고하여 답변해주세요:\n")
-        prompt_parts.append("=" * 40)
         for i, chunk in enumerate(relevant_chunks, 1):
-            prompt_parts.append(f"\n[참고 {i} - {chunk['doc_name']}]")
             content = chunk['content'][:300] if len(chunk['content']) > 300 else chunk['content']
-            prompt_parts.append(content)
-        prompt_parts.append("\n" + "=" * 40)
-        prompt_parts.append(f"\n질문: {query}")
-        return "\n".join(prompt_parts)
-# RAG 시스템 인스턴스 생성
 rag_system = PDFRAGSystem()
-# State variable to track current model and RAG settings
-current_model = gr.State("openai/gpt-oss-120b")
-rag_enabled_state = gr.State(False)
-selected_docs_state = gr.State([])
-top_k_state = gr.State(3)
 def upload_pdf(file):
     """PDF 파일 업로드 처리"""
     if file is None:
         return (
             gr.update(value="<div class='pdf-status pdf-info'>📁 파일을 선택해주세요</div>"),
-            gr.update(choices=[]),
-            gr.update(value=False)
         )
     try:
@@ -302,257 +398,167 @@ def upload_pdf(file):
             return (
                 status_html,
-                gr.update(choices=doc_choices, value=doc_choices),
-                gr.update(value=True)
             )
         else:
             return (
                 f"<div class='pdf-status pdf-error'>❌ 오류: {result['error']}</div>",
-                gr.update(),
-                gr.update(value=False)
             )
     except Exception as e:
         return (
             f"<div class='pdf-status pdf-error'>❌ 오류: {str(e)}</div>",
-            gr.update(),
-            gr.update(value=False)
         )
 def clear_documents():
     """문서 초기화"""
     rag_system.documents = {}
     rag_system.document_chunks = {}
     rag_system.embeddings_store = {}
     return (
         gr.update(value="<div class='pdf-status pdf-info'>🗑️ 모든 문서가 삭제되었습니다</div>"),
-        gr.update(choices=[], value=[]),
-        gr.update(value=False)
     )
-def switch_model(model_choice):
-    """Function to switch between models"""
-    return gr.update(visible=False), gr.update(visible=True), model_choice
-def create_rag_wrapper(original_fn, model_name):
-    """원본 모델 함수를 RAG로 감싸는 래퍼 생성"""
-    def wrapped_fn(message, history=None):
-        # RAG 설정 가져오기
-        if rag_enabled_state.value and selected_docs_state.value:
-            doc_ids = [doc.split(":")[0] for doc in selected_docs_state.value]
-            enhanced_message = rag_system.create_rag_prompt(message, doc_ids, top_k_state.value)
-            # RAG 적용 알림
-            print(f"🔍 RAG 적용: {len(message)}자 → {len(enhanced_message)}자")
-            # 원본 모델에 강화된 메시지 전달
-            if history is not None:
-                return original_fn(enhanced_message, history)
-            else:
-                return original_fn(enhanced_message)
-        else:
-            # RAG 미적용시 원본 메시지 그대로 전달
-            if history is not None:
-                return original_fn(message, history)
-            else:
-                return original_fn(message)
-    return wrapped_fn
-# Main interface with soft theme
-with gr.Blocks(fill_height=True, theme=gr.themes.Soft(), css=custom_css) as demo:
     with gr.Row():
-        # Sidebar
         with gr.Column(scale=1):
             with gr.Group(elem_classes="main-container"):
-                gr.Markdown("# 🚀 Inference Provider + RAG")
-                gr.Markdown(
-                    "OpenAI GPT-OSS models served by Cerebras API. "
-                    "Upload PDF documents for context-aware responses."
                 )
-                # Model selection
-                model_dropdown = gr.Dropdown(
-                    choices=["openai/gpt-oss-120b", "openai/gpt-oss-20b"],
-                    value="openai/gpt-oss-120b",
-                    label="📊 Select Model",
-                    info="Choose between different model sizes"
                 )
-                # Login button
-                login_button = gr.LoginButton("Sign in with Hugging Face", size="lg")
-                # Reload button to apply model change
-                reload_btn = gr.Button("🔄 Apply Model Change", variant="primary", size="lg")
-                # RAG Settings
-                with gr.Accordion("📚 PDF RAG Settings", open=True):
-                    pdf_upload = gr.File(
-                        label="Upload PDF",
-                        file_types=[".pdf"],
-                        type="filepath"
-                    )
-                    upload_status = gr.HTML(
-                        value="<div class='pdf-status pdf-info'>📤 Upload a PDF to enable document-based answers</div>"
-                    )
-                    document_list = gr.CheckboxGroup(
-                        choices=[],
-                        label="📄 Uploaded Documents",
-                        info="Select documents to use as context"
-                    )
-                    clear_btn = gr.Button("🗑️ Clear All Documents", size="sm", variant="secondary")
-                    enable_rag = gr.Checkbox(
-                        label="✨ Enable RAG",
-                        value=False,
-                        info="Use documents for context-aware responses"
-                    )
-                    top_k_chunks = gr.Slider(
-                        minimum=1,
-                        maximum=5,
-                        value=3,
-                        step=1,
-                        label="Context Chunks",
-                        info="Number of document chunks to use"
-                    )
-                # Additional options
-                with gr.Accordion("⚙️ Advanced Options", open=False):
-                    gr.Markdown("*These options will be available after model implementation*")
-                    temperature = gr.Slider(
-                        minimum=0,
-                        maximum=2,
-                        value=0.7,
-                        step=0.1,
-                        label="Temperature"
-                    )
-                    max_tokens = gr.Slider(
-                        minimum=1,
-                        maximum=4096,
-                        value=512,
-                        step=1,
-                        label="Max Tokens"
-                    )
-        # Main chat area
-        with gr.Column(scale=3):
-            with gr.Group(elem_classes="main-container"):
-                gr.Markdown("## 💬 Chat Interface")
-                # RAG status
                 rag_status = gr.HTML(
                     value="<div class='pdf-status pdf-info'>🔍 RAG: <strong>Disabled</strong></div>"
                 )
-                # RAG context preview
                 context_preview = gr.HTML(value="", visible=False)
-                # Container for model interfaces
-                with gr.Column(visible=True) as model_120b_container:
-                    gr.Markdown("### Model: openai/gpt-oss-120b")
-                    # Load the original model and wrap it with RAG
-                    original_interface_120b = gr.load(
-                        "models/openai/gpt-oss-120b",
-                        accept_token=login_button,
-                        provider="fireworks-ai"
-                    )
-                    # Note: The loaded interface will have its own chat components
-                    # We'll intercept the messages through our wrapper function
-                with gr.Column(visible=False) as model_20b_container:
-                    gr.Markdown("### Model: openai/gpt-oss-20b")
-                    # Load the original model
-                    original_interface_20b = gr.load(
-                        "models/openai/gpt-oss-20b",
-                        accept_token=login_button,
-                        provider="fireworks-ai"
-                    )
-    # Event Handlers
-    # PDF upload
     pdf_upload.upload(
         fn=upload_pdf,
         inputs=[pdf_upload],
-        outputs=[upload_status, document_list, enable_rag]
     )
-    # Clear documents
     clear_btn.click(
         fn=clear_documents,
-        outputs=[upload_status, document_list, enable_rag]
     )
-    # Update RAG state when settings change
-    def update_rag_state(enabled, docs, k):
-        rag_enabled_state.value = enabled
-        selected_docs_state.value = docs if docs else []
-        top_k_state.value = k
-        status = "✅ Enabled" if enabled and docs else "⭕ Disabled"
-        status_html = f"<div class='pdf-status pdf-info'>🔍 RAG: <strong>{status}</strong></div>"
-        # Show context preview if RAG is enabled
-        if enabled and docs:
-            preview = f"<div class='rag-context'>📚 Using {len(docs)} document(s) with {k} chunks per query</div>"
-            return gr.update(value=status_html), gr.update(value=preview, visible=True)
-        else:
-            return gr.update(value=status_html), gr.update(value="", visible=False)
-    # Connect RAG state updates
     enable_rag.change(
-        fn=update_rag_state,
-        inputs=[enable_rag, document_list, top_k_chunks],
         outputs=[rag_status, context_preview]
     )
     document_list.change(
-        fn=update_rag_state,
-        inputs=[enable_rag, document_list, top_k_chunks],
         outputs=[rag_status, context_preview]
     )
-    top_k_chunks.change(
-        fn=update_rag_state,
-        inputs=[enable_rag, document_list, top_k_chunks],
         outputs=[rag_status, context_preview]
     )
-    # Handle model switching
-    reload_btn.click(
-        fn=switch_model,
-        inputs=[model_dropdown],
-        outputs=[model_120b_container, model_20b_container, current_model]
-    ).then(
-        fn=lambda: gr.Info("Model switched successfully!"),
-        inputs=[],
-        outputs=[]
-    )
-    # Update visibility based on dropdown selection
-    def update_visibility(model_choice):
-        if model_choice == "openai/gpt-oss-120b":
-            return gr.update(visible=True), gr.update(visible=False)
-        else:
-            return gr.update(visible=False), gr.update(visible=True)
-    model_dropdown.change(
-        fn=update_visibility,
-        inputs=[model_dropdown],
-        outputs=[model_120b_container, model_20b_container]
-    )
-    # Monkey-patch the loaded interfaces to add RAG support
-    # This is done after the interface is loaded
-    demo.load = lambda: print("📚 RAG System Ready!")
-demo.launch()

 import gradio as gr
+import spaces
 import os
 from typing import List, Dict, Any, Optional, Tuple
 import hashlib
 from datetime import datetime
 import numpy as np
+from transformers import pipeline, TextIteratorStreamer
+import torch
+from threading import Thread
+import re
 # PDF 처리 라이브러리
 try:
     ST_AVAILABLE = False
     print("⚠️ Sentence Transformers not installed. Install with: pip install sentence-transformers")
+# Custom CSS
 custom_css = """
 .gradio-container {
     background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
     margin: 12px;
 }
 .pdf-status {
     padding: 12px 16px;
     border-radius: 12px;
     border-radius: 8px;
     font-size: 0.9rem;
 }
+.thinking-section {
+    background: rgba(0, 0, 0, 0.02);
+    border: 1px solid rgba(0, 0, 0, 0.1);
+    border-radius: 8px;
+    padding: 12px;
+    margin: 8px 0;
+}
 """
 class SimpleTextSplitter:
         all_relevant_chunks.sort(key=lambda x: x.get('similarity', 0), reverse=True)
         return all_relevant_chunks[:top_k]
+    def create_rag_prompt(self, query: str, doc_ids: List[str], top_k: int = 3) -> tuple:
+        """RAG 프롬프트 생성 - 쿼리와 컨텍스트를 분리하여 반환"""
         relevant_chunks = self.search_relevant_chunks(query, doc_ids, top_k)
         if not relevant_chunks:
+            return query, ""
+        # 컨텍스트 구성
+        context_parts = []
+        context_parts.append("다음 문서 내용을 참고하여 답변해주세요:\n")
+        context_parts.append("=" * 40)
         for i, chunk in enumerate(relevant_chunks, 1):
+            context_parts.append(f"\n[참고 {i} - {chunk['doc_name']}]")
             content = chunk['content'][:300] if len(chunk['content']) > 300 else chunk['content']
+            context_parts.append(content)
+        context_parts.append("\n" + "=" * 40)
+        context = "\n".join(context_parts)
+        enhanced_query = f"{context}\n\n질문: {query}"
+        return enhanced_query, context
+# Initialize model and RAG system
+model_id = "openai/gpt-oss-20b"
+pipe = pipeline(
+    "text-generation",
+    model=model_id,
+    torch_dtype="auto",
+    device_map="auto",
+)
 rag_system = PDFRAGSystem()
+# Global state for RAG
+rag_enabled = False
+selected_docs = []
+top_k_chunks = 3
+last_context = ""
+def format_conversation_history(chat_history):
+    """Format conversation history for the model"""
+    messages = []
+    for item in chat_history:
+        role = item["role"]
+        content = item["content"]
+        if isinstance(content, list):
+            content = content[0]["text"] if content and "text" in content[0] else str(content)
+        messages.append({"role": role, "content": content})
+    return messages
+@spaces.GPU()
+def generate_response(input_data, chat_history, max_new_tokens, system_prompt, temperature, top_p, top_k, repetition_penalty):
+    """Generate response with optional RAG enhancement"""
+    global last_context
+    # Apply RAG if enabled
+    if rag_enabled and selected_docs:
+        doc_ids = [doc.split(":")[0] for doc in selected_docs]
+        enhanced_input, context = rag_system.create_rag_prompt(input_data, doc_ids, top_k_chunks)
+        last_context = context
+        actual_input = enhanced_input
+    else:
+        actual_input = input_data
+        last_context = ""
+    # Prepare messages
+    new_message = {"role": "user", "content": actual_input}
+    system_message = [{"role": "system", "content": system_prompt}] if system_prompt else []
+    processed_history = format_conversation_history(chat_history)
+    messages = system_message + processed_history + [new_message]
+    # Setup streaming
+    streamer = TextIteratorStreamer(pipe.tokenizer, skip_prompt=True, skip_special_tokens=True)
+    generation_kwargs = {
+        "max_new_tokens": max_new_tokens,
+        "do_sample": True,
+        "temperature": temperature,
+        "top_p": top_p,
+        "top_k": top_k,
+        "repetition_penalty": repetition_penalty,
+        "streamer": streamer
+    }
+    thread = Thread(target=pipe, args=(messages,), kwargs=generation_kwargs)
+    thread.start()
+    # Process streaming output
+    thinking = ""
+    final = ""
+    started_final = False
+    for chunk in streamer:
+        if not started_final:
+            if "assistantfinal" in chunk.lower():
+                split_parts = re.split(r'assistantfinal', chunk, maxsplit=1)
+                thinking += split_parts[0]
+                final += split_parts[1]
+                started_final = True
+            else:
+                thinking += chunk
+        else:
+            final += chunk
+        clean_thinking = re.sub(r'^analysis\s*', '', thinking).strip()
+        clean_final = final.strip()
+        # Add RAG context indicator if used
+        rag_indicator = ""
+        if rag_enabled and selected_docs and last_context:
+            rag_indicator = "<div class='rag-context'>📚 RAG Context Applied</div>\n\n"
+        formatted = f"{rag_indicator}<details open><summary>Click to view Thinking Process</summary>\n\n{clean_thinking}\n\n</details>\n\n{clean_final}"
+        yield formatted
 def upload_pdf(file):
     """PDF 파일 업로드 처리"""
     if file is None:
         return (
             gr.update(value="<div class='pdf-status pdf-info'>📁 파일을 선택해주세요</div>"),
+            gr.update(choices=[])
         )
     try:
             return (
                 status_html,
+                gr.update(choices=doc_choices, value=doc_choices)
             )
         else:
             return (
                 f"<div class='pdf-status pdf-error'>❌ 오류: {result['error']}</div>",
+                gr.update()
             )
     except Exception as e:
         return (
             f"<div class='pdf-status pdf-error'>❌ 오류: {str(e)}</div>",
+            gr.update()
         )
 def clear_documents():
     """문서 초기화"""
+    global selected_docs
     rag_system.documents = {}
     rag_system.document_chunks = {}
     rag_system.embeddings_store = {}
+    selected_docs = []
     return (
         gr.update(value="<div class='pdf-status pdf-info'>🗑️ 모든 문서가 삭제되었습니다</div>"),
+        gr.update(choices=[], value=[])
     )
+def update_rag_settings(enable, docs, k):
+    """Update RAG settings"""
+    global rag_enabled, selected_docs, top_k_chunks
+    rag_enabled = enable
+    selected_docs = docs if docs else []
+    top_k_chunks = k
+    status = "✅ Enabled" if enable and docs else "⭕ Disabled"
+    status_html = f"<div class='pdf-status pdf-info'>🔍 RAG: <strong>{status}</strong></div>"
+    # Show context preview if RAG is enabled
+    if enable and docs:
+        preview = f"<div class='rag-context'>📚 Using {len(docs)} document(s) with {k} chunks per query</div>"
+        return gr.update(value=status_html), gr.update(value=preview, visible=True)
+    else:
+        return gr.update(value=status_html), gr.update(value="", visible=False)
+# Build the interface
+with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, fill_height=True) as demo:
+    gr.Markdown("# 🚀 GPT-OSS-20B with PDF RAG System")
+    gr.Markdown("Enhanced AI assistant with document-based context understanding")
     with gr.Row():
+        # Left sidebar for RAG controls
         with gr.Column(scale=1):
             with gr.Group(elem_classes="main-container"):
+                gr.Markdown("### 📚 Document RAG Settings")
+                pdf_upload = gr.File(
+                    label="Upload PDF",
+                    file_types=[".pdf"],
+                    type="filepath"
                 )
+                upload_status = gr.HTML(
+                    value="<div class='pdf-status pdf-info'>📤 Upload a PDF to enable document-based answers</div>"
                 )
+                document_list = gr.CheckboxGroup(
+                    choices=[],
+                    label="📄 Select Documents",
+                    info="Choose documents to use as context"
+                )
+                clear_btn = gr.Button("🗑️ Clear All Documents", size="sm", variant="secondary")
+                enable_rag = gr.Checkbox(
+                    label="✨ Enable RAG",
+                    value=False,
+                    info="Use documents for context-aware responses"
+                )
+                top_k_slider = gr.Slider(
+                    minimum=1,
+                    maximum=5,
+                    value=3,
+                    step=1,
+                    label="Context Chunks",
+                    info="Number of document chunks to use"
+                )
+                # RAG status display
                 rag_status = gr.HTML(
                     value="<div class='pdf-status pdf-info'>🔍 RAG: <strong>Disabled</strong></div>"
                 )
                 context_preview = gr.HTML(value="", visible=False)
+        # Right side for chat interface
+        with gr.Column(scale=3):
+            with gr.Group(elem_classes="main-container"):
+                # Create ChatInterface with custom function
+                chat_interface = gr.ChatInterface(
+                    fn=generate_response,
+                    additional_inputs=[
+                        gr.Slider(label="Max new tokens", minimum=64, maximum=4096, step=1, value=2048),
+                        gr.Textbox(
+                            label="System Prompt",
+                            value="You are a helpful assistant. Reasoning: medium",
+                            lines=4,
+                            placeholder="Change system prompt"
+                        ),
+                        gr.Slider(label="Temperature", minimum=0.1, maximum=2.0, step=0.1, value=0.7),
+                        gr.Slider(label="Top-p", minimum=0.05, maximum=1.0, step=0.05, value=0.9),
+                        gr.Slider(label="Top-k", minimum=1, maximum=100, step=1, value=50),
+                        gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.0)
+                    ],
+                    examples=[
+                        [{"text": "Explain Newton laws clearly and concisely"}],
+                        [{"text": "Write a Python function to calculate the Fibonacci sequence"}],
+                        [{"text": "What are the benefits of open weight AI models"}],
+                    ],
+                    cache_examples=False,
+                    type="messages",
+                    description="""Chat with GPT-OSS-20B. Upload PDFs to enhance responses with document context.
+                    Click to view thinking process (default is on).""",
+                    textbox=gr.Textbox(
+                        label="Query Input",
+                        placeholder="Type your prompt (RAG will be applied if enabled)"
+                    ),
+                    stop_btn="Stop Generation",
+                    multimodal=False
+                )
+    # Event handlers
     pdf_upload.upload(
         fn=upload_pdf,
         inputs=[pdf_upload],
+        outputs=[upload_status, document_list]
     )
     clear_btn.click(
         fn=clear_documents,
+        outputs=[upload_status, document_list]
     )
+    # Update RAG settings when changed
     enable_rag.change(
+        fn=update_rag_settings,
+        inputs=[enable_rag, document_list, top_k_slider],
         outputs=[rag_status, context_preview]
     )
     document_list.change(
+        fn=update_rag_settings,
+        inputs=[enable_rag, document_list, top_k_slider],
         outputs=[rag_status, context_preview]
     )
+    top_k_slider.change(
+        fn=update_rag_settings,
+        inputs=[enable_rag, document_list, top_k_slider],
         outputs=[rag_status, context_preview]
     )
+if __name__ == "__main__":
+    demo.launch(share=True)