Spaces:
Sleeping
Sleeping
File size: 5,343 Bytes
a76b665 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
"""
RAG Knowledge Assistant - Hugging Face Spaces Demo
Production-ready Retrieval-Augmented Generation system
"""
import gradio as gr
import os
import numpy as np
from pathlib import Path
from typing import List, Tuple, Dict
import time
# Mock classes for Hugging Face demo
class MockDocumentProcessor:
def __init__(self, chunk_size=400, overlap=50):
self.chunk_size = chunk_size
self.overlap = overlap
def process_text_file(self, file_path: str) -> List[Dict]:
with open(file_path, 'r', encoding='utf-8') as f:
text = f.read()
chunks = []
for i in range(0, len(text), self.chunk_size):
chunk_text = text[i:i + self.chunk_size]
if chunk_text.strip():
chunks.append({
'text': chunk_text.strip(),
'chunk_id': f"chunk_{len(chunks)}",
'source': file_path,
'char_count': len(chunk_text)
})
return chunks
class MockRAGDemo:
def __init__(self):
self.document_processor = MockDocumentProcessor()
self.chunks = []
self.processed_docs = []
print("π RAG Demo initialized")
def process_file(self, file):
if file is None:
return "β No file uploaded"
try:
chunks = self.document_processor.process_text_file(file.name)
self.chunks.extend(chunks)
file_name = Path(file.name).name
self.processed_docs.append({
'name': file_name,
'chunks': len(chunks),
'timestamp': time.strftime("%H:%M:%S")
})
return f"β
Processed {file_name}!\nπ Created {len(chunks)} chunks\nπ Total: {len(self.chunks)} chunks"
except Exception as e:
return f"β Error: {str(e)}"
def chat(self, message: str, history: List[Tuple[str, str]]):
if not message.strip():
return "", history
if not self.chunks:
response = "β οΈ Upload a document first!"
history.append((message, response))
return "", history
# Mock search and response
relevant_chunks = self.chunks[:3] # Mock: take first 3 chunks
context = "\n".join([chunk['text'][:200] + "..." for chunk in relevant_chunks])
response = f"""π€ **Demo Response** (Mock AI for Hugging Face)
Based on your uploaded documents, here's what I found:
**Context:** {context}
**Mock Analysis:** This is a demonstration of the RAG system architecture. In the full version with OpenAI API:
- Real similarity search finds most relevant chunks
- GPT-4 generates contextual responses
- Source attribution with confidence scores
π **Sources:** {', '.join([Path(c['source']).name for c in relevant_chunks])}
π **Full Version:** [GitHub Repository](https://github.com/drbinna/rag-knowledge-assistant)"""
history.append((message, response))
return "", history
# Create demo
rag_demo = MockRAGDemo()
with gr.Blocks(title="RAG Knowledge Assistant", theme=gr.themes.Soft()) as demo:
gr.HTML("""
<div style="text-align: center; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 10px; margin-bottom: 20px;">
<h1>π€ RAG Knowledge Assistant</h1>
<p>Production-ready Retrieval-Augmented Generation system</p>
<p><em>Real Similarity Search β’ Smart Document Processing β’ AI Integration</em></p>
</div>
""")
with gr.Tabs():
with gr.TabItem("π¬ Chat"):
chatbot = gr.Chatbot(label="Conversation", height=400)
with gr.Row():
msg = gr.Textbox(label="Your Question", placeholder="Ask about your documents...", scale=4)
send_btn = gr.Button("Send", variant="primary", scale=1)
with gr.TabItem("π Upload"):
gr.Markdown("### Upload Text Documents")
file_upload = gr.File(label="Choose TXT file", file_types=[".txt"])
upload_btn = gr.Button("Process Document", variant="primary")
upload_status = gr.Textbox(label="Status", lines=5, interactive=False)
with gr.TabItem("βΉοΈ About"):
gr.Markdown("""
## RAG Knowledge Assistant
**Production-ready Retrieval-Augmented Generation system**
### π§ Features
- Real cosine similarity search with NumPy
- Smart document chunking (400 chars + overlap)
- OpenAI GPT-4 integration
- Professional error handling
### π Full Version
**[GitHub Repository](https://github.com/drbinna/rag-knowledge-assistant)**
- PDF support
- Local deployment
- Advanced configuration
Built with Python, OpenAI, NumPy, and Gradio.
""")
# Event handlers
msg.submit(rag_demo.chat, [msg, chatbot], [msg, chatbot])
send_btn.click(rag_demo.chat, [msg, chatbot], [msg, chatbot])
upload_btn.click(rag_demo.process_file, file_upload, upload_status)
if __name__ == "__main__":
demo.launch()
|