raksama19 commited on
Commit
3c6cd0f
Β·
verified Β·
1 Parent(s): e2605d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -8
app.py CHANGED
@@ -708,13 +708,16 @@ with gr.Blocks(
708
  if model is None:
709
  return history + [[message, "❌ Failed to initialize Gemini model. Please check your GEMINI_API_KEY."]]
710
 
711
- # Use RAG to get relevant chunks from markdown
712
  if document_chunks and len(document_chunks) > 0:
713
- relevant_chunks = retrieve_relevant_chunks(message, document_chunks, document_embeddings)
714
  context = "\n\n".join(relevant_chunks)
 
 
 
715
  else:
716
  # Fallback to truncated document if RAG fails
717
- context = processed_markdown[:2000] + "..." if len(processed_markdown) > 2000 else processed_markdown
718
 
719
  # Create prompt for Gemini
720
  prompt = f"""You are a helpful assistant that answers questions about documents. Use the provided context to answer questions accurately and concisely.
@@ -726,12 +729,26 @@ Question: {message}
726
 
727
  Please provide a clear and helpful answer based on the context provided."""
728
 
729
- # Generate response using Gemini API
730
- response = model.generate_content(prompt)
 
731
 
732
- response_text = response.text if hasattr(response, 'text') else str(response)
733
-
734
- return history + [[message, response_text]]
 
 
 
 
 
 
 
 
 
 
 
 
 
735
 
736
  except Exception as e:
737
  error_msg = f"❌ Error generating response: {str(e)}"
 
708
  if model is None:
709
  return history + [[message, "❌ Failed to initialize Gemini model. Please check your GEMINI_API_KEY."]]
710
 
711
+ # Use RAG to get relevant chunks from markdown (smaller chunks for quota)
712
  if document_chunks and len(document_chunks) > 0:
713
+ relevant_chunks = retrieve_relevant_chunks(message, document_chunks, document_embeddings, top_k=2)
714
  context = "\n\n".join(relevant_chunks)
715
+ # Limit context size to avoid quota issues
716
+ if len(context) > 1000:
717
+ context = context[:1000] + "..."
718
  else:
719
  # Fallback to truncated document if RAG fails
720
+ context = processed_markdown[:800] + "..." if len(processed_markdown) > 800 else processed_markdown
721
 
722
  # Create prompt for Gemini
723
  prompt = f"""You are a helpful assistant that answers questions about documents. Use the provided context to answer questions accurately and concisely.
 
729
 
730
  Please provide a clear and helpful answer based on the context provided."""
731
 
732
+ # Generate response using Gemini API with retry logic
733
+ import time
734
+ max_retries = 2
735
 
736
+ for attempt in range(max_retries):
737
+ try:
738
+ response = model.generate_content(prompt)
739
+ response_text = response.text if hasattr(response, 'text') else str(response)
740
+ return history + [[message, response_text]]
741
+ except Exception as api_error:
742
+ if "429" in str(api_error) and attempt < max_retries - 1:
743
+ # Rate limit hit, wait and retry
744
+ time.sleep(3)
745
+ continue
746
+ else:
747
+ # Other error or final attempt failed
748
+ if "429" in str(api_error):
749
+ return history + [[message, "❌ API quota exceeded. Please wait a moment and try again, or check your Gemini API billing."]]
750
+ else:
751
+ raise api_error
752
 
753
  except Exception as e:
754
  error_msg = f"❌ Error generating response: {str(e)}"