Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -419,8 +419,8 @@ gemini_model = None
|
|
419 |
current_model = None # Track which model is currently loaded
|
420 |
|
421 |
|
422 |
-
def chunk_document(text, chunk_size=
|
423 |
-
"""Split document into overlapping chunks for RAG"""
|
424 |
words = text.split()
|
425 |
chunks = []
|
426 |
|
@@ -451,10 +451,10 @@ def create_embeddings(chunks):
|
|
451 |
print(f"Error creating embeddings: {e}")
|
452 |
return None
|
453 |
|
454 |
-
def retrieve_relevant_chunks(question, chunks, embeddings, top_k=
|
455 |
"""Retrieve most relevant chunks for a question"""
|
456 |
if embedding_model is None or embeddings is None:
|
457 |
-
return chunks[:
|
458 |
|
459 |
try:
|
460 |
question_embedding = embedding_model.encode([question], show_progress_bar=False)
|
@@ -467,7 +467,7 @@ def retrieve_relevant_chunks(question, chunks, embeddings, top_k=2):
|
|
467 |
return relevant_chunks
|
468 |
except Exception as e:
|
469 |
print(f"Error retrieving chunks: {e}")
|
470 |
-
return chunks[:
|
471 |
|
472 |
def process_uploaded_pdf(pdf_file, progress=gr.Progress()):
|
473 |
"""Main processing function for uploaded PDF"""
|
@@ -708,16 +708,18 @@ with gr.Blocks(
|
|
708 |
if model is None:
|
709 |
return history + [[message, "❌ Failed to initialize Gemini model. Please check your GEMINI_API_KEY."]]
|
710 |
|
711 |
-
# Use RAG to get relevant chunks from markdown (
|
712 |
if document_chunks and len(document_chunks) > 0:
|
713 |
-
relevant_chunks = retrieve_relevant_chunks(message, document_chunks, document_embeddings, top_k=
|
714 |
context = "\n\n".join(relevant_chunks)
|
715 |
-
#
|
716 |
-
if len(context) >
|
717 |
-
|
|
|
|
|
718 |
else:
|
719 |
# Fallback to truncated document if RAG fails
|
720 |
-
context = processed_markdown[:
|
721 |
|
722 |
# Create prompt for Gemini
|
723 |
prompt = f"""You are a helpful assistant that answers questions about documents. Use the provided context to answer questions accurately and concisely.
|
|
|
419 |
current_model = None # Track which model is currently loaded
|
420 |
|
421 |
|
422 |
+
def chunk_document(text, chunk_size=1024, overlap=100):
|
423 |
+
"""Split document into overlapping chunks for RAG - optimized for API quota"""
|
424 |
words = text.split()
|
425 |
chunks = []
|
426 |
|
|
|
451 |
print(f"Error creating embeddings: {e}")
|
452 |
return None
|
453 |
|
454 |
+
def retrieve_relevant_chunks(question, chunks, embeddings, top_k=3):
|
455 |
"""Retrieve most relevant chunks for a question"""
|
456 |
if embedding_model is None or embeddings is None:
|
457 |
+
return chunks[:3] # Fallback to first 3 chunks
|
458 |
|
459 |
try:
|
460 |
question_embedding = embedding_model.encode([question], show_progress_bar=False)
|
|
|
467 |
return relevant_chunks
|
468 |
except Exception as e:
|
469 |
print(f"Error retrieving chunks: {e}")
|
470 |
+
return chunks[:3] # Fallback
|
471 |
|
472 |
def process_uploaded_pdf(pdf_file, progress=gr.Progress()):
|
473 |
"""Main processing function for uploaded PDF"""
|
|
|
708 |
if model is None:
|
709 |
return history + [[message, "❌ Failed to initialize Gemini model. Please check your GEMINI_API_KEY."]]
|
710 |
|
711 |
+
# Use RAG to get relevant chunks from markdown (balanced for performance vs quota)
|
712 |
if document_chunks and len(document_chunks) > 0:
|
713 |
+
relevant_chunks = retrieve_relevant_chunks(message, document_chunks, document_embeddings, top_k=3)
|
714 |
context = "\n\n".join(relevant_chunks)
|
715 |
+
# Smart truncation: aim for ~4000 chars (good context while staying under quota)
|
716 |
+
if len(context) > 4000:
|
717 |
+
# Try to cut at sentence boundaries
|
718 |
+
sentences = context[:4000].split('.')
|
719 |
+
context = '.'.join(sentences[:-1]) + '...' if len(sentences) > 1 else context[:4000] + '...'
|
720 |
else:
|
721 |
# Fallback to truncated document if RAG fails
|
722 |
+
context = processed_markdown[:4000] + "..." if len(processed_markdown) > 4000 else processed_markdown
|
723 |
|
724 |
# Create prompt for Gemini
|
725 |
prompt = f"""You are a helpful assistant that answers questions about documents. Use the provided context to answer questions accurately and concisely.
|