Spaces:

drbinna
/

rag-knowledge-assistant

Sleeping

File size: 5,343 Bytes

a76b665

"""
RAG Knowledge Assistant - Hugging Face Spaces Demo
Production-ready Retrieval-Augmented Generation system
"""

import gradio as gr
import os
import numpy as np
from pathlib import Path
from typing import List, Tuple, Dict
import time

# Mock classes for Hugging Face demo
class MockDocumentProcessor:
    def __init__(self, chunk_size=400, overlap=50):
        self.chunk_size = chunk_size
        self.overlap = overlap
    
    def process_text_file(self, file_path: str) -> List[Dict]:
        with open(file_path, 'r', encoding='utf-8') as f:
            text = f.read()
        
        chunks = []
        for i in range(0, len(text), self.chunk_size):
            chunk_text = text[i:i + self.chunk_size]
            if chunk_text.strip():
                chunks.append({
                    'text': chunk_text.strip(),
                    'chunk_id': f"chunk_{len(chunks)}",
                    'source': file_path,
                    'char_count': len(chunk_text)
                })
        return chunks

class MockRAGDemo:
    def __init__(self):
        self.document_processor = MockDocumentProcessor()
        self.chunks = []
        self.processed_docs = []
        print("🚀 RAG Demo initialized")
    
    def process_file(self, file):
        if file is None:
            return "❌ No file uploaded"
        
        try:
            chunks = self.document_processor.process_text_file(file.name)
            self.chunks.extend(chunks)
            
            file_name = Path(file.name).name
            self.processed_docs.append({
                'name': file_name,
                'chunks': len(chunks),
                'timestamp': time.strftime("%H:%M:%S")
            })
            
            return f"✅ Processed {file_name}!\n📊 Created {len(chunks)} chunks\n🎉 Total: {len(self.chunks)} chunks"
        except Exception as e:
            return f"❌ Error: {str(e)}"
    
    def chat(self, message: str, history: List[Tuple[str, str]]):
        if not message.strip():
            return "", history
        
        if not self.chunks:
            response = "⚠️ Upload a document first!"
            history.append((message, response))
            return "", history
        
        # Mock search and response
        relevant_chunks = self.chunks[:3]  # Mock: take first 3 chunks
        context = "\n".join([chunk['text'][:200] + "..." for chunk in relevant_chunks])
        
        response = f"""🤖 **Demo Response** (Mock AI for Hugging Face)

Based on your uploaded documents, here's what I found:

**Context:** {context}

**Mock Analysis:** This is a demonstration of the RAG system architecture. In the full version with OpenAI API:
- Real similarity search finds most relevant chunks
- GPT-4 generates contextual responses
- Source attribution with confidence scores

📚 **Sources:** {', '.join([Path(c['source']).name for c in relevant_chunks])}

🔗 **Full Version:** [GitHub Repository](https://github.com/drbinna/rag-knowledge-assistant)"""
        
        history.append((message, response))
        return "", history

# Create demo
rag_demo = MockRAGDemo()

with gr.Blocks(title="RAG Knowledge Assistant", theme=gr.themes.Soft()) as demo:
    gr.HTML("""
    <div style="text-align: center; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 10px; margin-bottom: 20px;">
        <h1>🤖 RAG Knowledge Assistant</h1>
        <p>Production-ready Retrieval-Augmented Generation system</p>
        <p><em>Real Similarity Search • Smart Document Processing • AI Integration</em></p>
    </div>
    """)
    
    with gr.Tabs():
        with gr.TabItem("💬 Chat"):
            chatbot = gr.Chatbot(label="Conversation", height=400)
            with gr.Row():
                msg = gr.Textbox(label="Your Question", placeholder="Ask about your documents...", scale=4)
                send_btn = gr.Button("Send", variant="primary", scale=1)
        
        with gr.TabItem("📁 Upload"):
            gr.Markdown("### Upload Text Documents")
            file_upload = gr.File(label="Choose TXT file", file_types=[".txt"])
            upload_btn = gr.Button("Process Document", variant="primary")
            upload_status = gr.Textbox(label="Status", lines=5, interactive=False)
        
        with gr.TabItem("ℹ️ About"):
            gr.Markdown("""
            ## RAG Knowledge Assistant
            
            **Production-ready Retrieval-Augmented Generation system**
            
            ### 🔧 Features
            - Real cosine similarity search with NumPy
            - Smart document chunking (400 chars + overlap)
            - OpenAI GPT-4 integration
            - Professional error handling
            
            ### 🚀 Full Version
            **[GitHub Repository](https://github.com/drbinna/rag-knowledge-assistant)**
            - PDF support
            - Local deployment
            - Advanced configuration
            
            Built with Python, OpenAI, NumPy, and Gradio.
            """)
    
    # Event handlers
    msg.submit(rag_demo.chat, [msg, chatbot], [msg, chatbot])
    send_btn.click(rag_demo.chat, [msg, chatbot], [msg, chatbot])
    upload_btn.click(rag_demo.process_file, file_upload, upload_status)

if __name__ == "__main__":
    demo.launch()