raksama19 commited on
Commit
1e2434f
·
verified ·
1 Parent(s): 84e44dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -11
app.py CHANGED
@@ -419,8 +419,8 @@ gemini_model = None
419
  current_model = None # Track which model is currently loaded
420
 
421
 
422
- def chunk_document(text, chunk_size=400, overlap=40):
423
- """Split document into overlapping chunks for RAG"""
424
  words = text.split()
425
  chunks = []
426
 
@@ -451,10 +451,10 @@ def create_embeddings(chunks):
451
  print(f"Error creating embeddings: {e}")
452
  return None
453
 
454
- def retrieve_relevant_chunks(question, chunks, embeddings, top_k=2):
455
  """Retrieve most relevant chunks for a question"""
456
  if embedding_model is None or embeddings is None:
457
- return chunks[:2] # Fallback to first 2 chunks
458
 
459
  try:
460
  question_embedding = embedding_model.encode([question], show_progress_bar=False)
@@ -467,7 +467,7 @@ def retrieve_relevant_chunks(question, chunks, embeddings, top_k=2):
467
  return relevant_chunks
468
  except Exception as e:
469
  print(f"Error retrieving chunks: {e}")
470
- return chunks[:2] # Fallback
471
 
472
  def process_uploaded_pdf(pdf_file, progress=gr.Progress()):
473
  """Main processing function for uploaded PDF"""
@@ -708,16 +708,18 @@ with gr.Blocks(
708
  if model is None:
709
  return history + [[message, "❌ Failed to initialize Gemini model. Please check your GEMINI_API_KEY."]]
710
 
711
- # Use RAG to get relevant chunks from markdown (smaller chunks for quota)
712
  if document_chunks and len(document_chunks) > 0:
713
- relevant_chunks = retrieve_relevant_chunks(message, document_chunks, document_embeddings, top_k=2)
714
  context = "\n\n".join(relevant_chunks)
715
- # Limit context size to avoid quota issues
716
- if len(context) > 1000:
717
- context = context[:1000] + "..."
 
 
718
  else:
719
  # Fallback to truncated document if RAG fails
720
- context = processed_markdown[:800] + "..." if len(processed_markdown) > 800 else processed_markdown
721
 
722
  # Create prompt for Gemini
723
  prompt = f"""You are a helpful assistant that answers questions about documents. Use the provided context to answer questions accurately and concisely.
 
419
  current_model = None # Track which model is currently loaded
420
 
421
 
422
+ def chunk_document(text, chunk_size=1024, overlap=100):
423
+ """Split document into overlapping chunks for RAG - optimized for API quota"""
424
  words = text.split()
425
  chunks = []
426
 
 
451
  print(f"Error creating embeddings: {e}")
452
  return None
453
 
454
+ def retrieve_relevant_chunks(question, chunks, embeddings, top_k=3):
455
  """Retrieve most relevant chunks for a question"""
456
  if embedding_model is None or embeddings is None:
457
+ return chunks[:3] # Fallback to first 3 chunks
458
 
459
  try:
460
  question_embedding = embedding_model.encode([question], show_progress_bar=False)
 
467
  return relevant_chunks
468
  except Exception as e:
469
  print(f"Error retrieving chunks: {e}")
470
+ return chunks[:3] # Fallback
471
 
472
  def process_uploaded_pdf(pdf_file, progress=gr.Progress()):
473
  """Main processing function for uploaded PDF"""
 
708
  if model is None:
709
  return history + [[message, "❌ Failed to initialize Gemini model. Please check your GEMINI_API_KEY."]]
710
 
711
+ # Use RAG to get relevant chunks from markdown (balanced for performance vs quota)
712
  if document_chunks and len(document_chunks) > 0:
713
+ relevant_chunks = retrieve_relevant_chunks(message, document_chunks, document_embeddings, top_k=3)
714
  context = "\n\n".join(relevant_chunks)
715
+ # Smart truncation: aim for ~4000 chars (good context while staying under quota)
716
+ if len(context) > 4000:
717
+ # Try to cut at sentence boundaries
718
+ sentences = context[:4000].split('.')
719
+ context = '.'.join(sentences[:-1]) + '...' if len(sentences) > 1 else context[:4000] + '...'
720
  else:
721
  # Fallback to truncated document if RAG fails
722
+ context = processed_markdown[:4000] + "..." if len(processed_markdown) > 4000 else processed_markdown
723
 
724
  # Create prompt for Gemini
725
  prompt = f"""You are a helpful assistant that answers questions about documents. Use the provided context to answer questions accurately and concisely.