Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -411,39 +411,49 @@ def search_api(query, top_k=5, search_type="hybrid", vector_weight=0.6, bm25_wei
|
|
411 |
|
412 |
def get_stats_api():
|
413 |
"""API endpoint for knowledge base statistics"""
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
"
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
"
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
447 |
|
448 |
def chat_interface(message, history):
|
449 |
"""Chat interface with hybrid search"""
|
@@ -711,6 +721,34 @@ demo = gr.TabbedInterface(
|
|
711 |
title="π₯ Hybrid Search RAGtim Bot - Vector + BM25 Fusion"
|
712 |
)
|
713 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
714 |
if __name__ == "__main__":
|
715 |
print("π Launching Hybrid Search RAGtim Bot...")
|
716 |
print(f"π Loaded {len(bot.knowledge_base)} sections from markdown files")
|
@@ -718,6 +756,60 @@ if __name__ == "__main__":
|
|
718 |
print(f"π§ Vector embeddings: {len(bot.embeddings)} documents")
|
719 |
print("π₯ Hybrid search ready: Semantic + Keyword fusion!")
|
720 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
721 |
demo.launch(
|
722 |
server_name="0.0.0.0",
|
723 |
server_port=7860,
|
|
|
411 |
|
412 |
def get_stats_api():
|
413 |
"""API endpoint for knowledge base statistics"""
|
414 |
+
try:
|
415 |
+
# Calculate document distribution by type
|
416 |
+
doc_types = {}
|
417 |
+
sections_by_file = {}
|
418 |
+
|
419 |
+
for doc in bot.knowledge_base:
|
420 |
+
doc_type = doc["metadata"]["type"]
|
421 |
+
source_file = doc["metadata"]["source"]
|
422 |
+
|
423 |
+
doc_types[doc_type] = doc_types.get(doc_type, 0) + 1
|
424 |
+
sections_by_file[source_file] = sections_by_file.get(source_file, 0) + 1
|
425 |
+
|
426 |
+
return {
|
427 |
+
"total_documents": len(bot.knowledge_base),
|
428 |
+
"document_types": doc_types,
|
429 |
+
"sections_by_file": sections_by_file,
|
430 |
+
"model_name": "sentence-transformers/all-MiniLM-L6-v2",
|
431 |
+
"embedding_dimension": 384,
|
432 |
+
"search_capabilities": [
|
433 |
+
"Hybrid Search (Vector + BM25)",
|
434 |
+
"Semantic Vector Search",
|
435 |
+
"BM25 Keyword Search",
|
436 |
+
"GPU Accelerated",
|
437 |
+
"Transformer Embeddings"
|
438 |
+
],
|
439 |
+
"bm25_parameters": {
|
440 |
+
"k1": bot.k1,
|
441 |
+
"b": bot.b,
|
442 |
+
"unique_terms": len(bot.document_frequency),
|
443 |
+
"average_doc_length": bot.average_doc_length
|
444 |
+
},
|
445 |
+
"backend_type": "Hugging Face Space with Hybrid Search",
|
446 |
+
"knowledge_sources": list(sections_by_file.keys()),
|
447 |
+
"status": "healthy"
|
448 |
+
}
|
449 |
+
except Exception as e:
|
450 |
+
print(f"Error in get_stats_api: {e}")
|
451 |
+
return {
|
452 |
+
"error": str(e),
|
453 |
+
"status": "error",
|
454 |
+
"total_documents": 0,
|
455 |
+
"search_capabilities": ["Error"]
|
456 |
+
}
|
457 |
|
458 |
def chat_interface(message, history):
|
459 |
"""Chat interface with hybrid search"""
|
|
|
721 |
title="π₯ Hybrid Search RAGtim Bot - Vector + BM25 Fusion"
|
722 |
)
|
723 |
|
724 |
+
# Add API routes for external access
|
725 |
+
def api_search(request: gr.Request):
|
726 |
+
"""Handle API search requests"""
|
727 |
+
try:
|
728 |
+
# Get query parameters
|
729 |
+
query = request.query_params.get('query', '')
|
730 |
+
top_k = int(request.query_params.get('top_k', 5))
|
731 |
+
search_type = request.query_params.get('search_type', 'hybrid')
|
732 |
+
vector_weight = float(request.query_params.get('vector_weight', 0.6))
|
733 |
+
bm25_weight = float(request.query_params.get('bm25_weight', 0.4))
|
734 |
+
|
735 |
+
if not query:
|
736 |
+
return {"error": "Query parameter is required"}
|
737 |
+
|
738 |
+
return search_api(query, top_k, search_type, vector_weight, bm25_weight)
|
739 |
+
except Exception as e:
|
740 |
+
return {"error": str(e)}
|
741 |
+
|
742 |
+
def api_stats(request: gr.Request):
|
743 |
+
"""Handle API stats requests"""
|
744 |
+
try:
|
745 |
+
return get_stats_api()
|
746 |
+
except Exception as e:
|
747 |
+
return {"error": str(e)}
|
748 |
+
|
749 |
+
# Mount API endpoints
|
750 |
+
demo.mount_gradio_app = lambda: None # Disable default mounting
|
751 |
+
|
752 |
if __name__ == "__main__":
|
753 |
print("π Launching Hybrid Search RAGtim Bot...")
|
754 |
print(f"π Loaded {len(bot.knowledge_base)} sections from markdown files")
|
|
|
756 |
print(f"π§ Vector embeddings: {len(bot.embeddings)} documents")
|
757 |
print("π₯ Hybrid search ready: Semantic + Keyword fusion!")
|
758 |
|
759 |
+
# Create a custom app with API routes
|
760 |
+
import uvicorn
|
761 |
+
from fastapi import FastAPI, Request
|
762 |
+
from fastapi.responses import JSONResponse
|
763 |
+
|
764 |
+
app = FastAPI()
|
765 |
+
|
766 |
+
@app.get("/api/search")
|
767 |
+
async def search_endpoint(request: Request):
|
768 |
+
try:
|
769 |
+
query = request.query_params.get('query', '')
|
770 |
+
top_k = int(request.query_params.get('top_k', 5))
|
771 |
+
search_type = request.query_params.get('search_type', 'hybrid')
|
772 |
+
vector_weight = float(request.query_params.get('vector_weight', 0.6))
|
773 |
+
bm25_weight = float(request.query_params.get('bm25_weight', 0.4))
|
774 |
+
|
775 |
+
if not query:
|
776 |
+
return JSONResponse({"error": "Query parameter is required"}, status_code=400)
|
777 |
+
|
778 |
+
result = search_api(query, top_k, search_type, vector_weight, bm25_weight)
|
779 |
+
return JSONResponse(result)
|
780 |
+
except Exception as e:
|
781 |
+
return JSONResponse({"error": str(e)}, status_code=500)
|
782 |
+
|
783 |
+
@app.post("/api/search")
|
784 |
+
async def search_endpoint_post(request: Request):
|
785 |
+
try:
|
786 |
+
body = await request.json()
|
787 |
+
query = body.get('query', '')
|
788 |
+
top_k = body.get('top_k', 5)
|
789 |
+
search_type = body.get('search_type', 'hybrid')
|
790 |
+
vector_weight = body.get('vector_weight', 0.6)
|
791 |
+
bm25_weight = body.get('bm25_weight', 0.4)
|
792 |
+
|
793 |
+
if not query:
|
794 |
+
return JSONResponse({"error": "Query is required"}, status_code=400)
|
795 |
+
|
796 |
+
result = search_api(query, top_k, search_type, vector_weight, bm25_weight)
|
797 |
+
return JSONResponse(result)
|
798 |
+
except Exception as e:
|
799 |
+
return JSONResponse({"error": str(e)}, status_code=500)
|
800 |
+
|
801 |
+
@app.get("/api/stats")
|
802 |
+
async def stats_endpoint():
|
803 |
+
try:
|
804 |
+
result = get_stats_api()
|
805 |
+
return JSONResponse(result)
|
806 |
+
except Exception as e:
|
807 |
+
return JSONResponse({"error": str(e)}, status_code=500)
|
808 |
+
|
809 |
+
# Mount Gradio app
|
810 |
+
app = gr.mount_gradio_app(app, demo, path="/")
|
811 |
+
|
812 |
+
# For Hugging Face Spaces, just launch the demo
|
813 |
demo.launch(
|
814 |
server_name="0.0.0.0",
|
815 |
server_port=7860,
|