Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -708,13 +708,16 @@ with gr.Blocks(
|
|
708 |
if model is None:
|
709 |
return history + [[message, "β Failed to initialize Gemini model. Please check your GEMINI_API_KEY."]]
|
710 |
|
711 |
-
# Use RAG to get relevant chunks from markdown
|
712 |
if document_chunks and len(document_chunks) > 0:
|
713 |
-
relevant_chunks = retrieve_relevant_chunks(message, document_chunks, document_embeddings)
|
714 |
context = "\n\n".join(relevant_chunks)
|
|
|
|
|
|
|
715 |
else:
|
716 |
# Fallback to truncated document if RAG fails
|
717 |
-
context = processed_markdown[:
|
718 |
|
719 |
# Create prompt for Gemini
|
720 |
prompt = f"""You are a helpful assistant that answers questions about documents. Use the provided context to answer questions accurately and concisely.
|
@@ -726,12 +729,26 @@ Question: {message}
|
|
726 |
|
727 |
Please provide a clear and helpful answer based on the context provided."""
|
728 |
|
729 |
-
# Generate response using Gemini API
|
730 |
-
|
|
|
731 |
|
732 |
-
|
733 |
-
|
734 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
735 |
|
736 |
except Exception as e:
|
737 |
error_msg = f"β Error generating response: {str(e)}"
|
|
|
708 |
if model is None:
|
709 |
return history + [[message, "β Failed to initialize Gemini model. Please check your GEMINI_API_KEY."]]
|
710 |
|
711 |
+
# Use RAG to get relevant chunks from markdown (smaller chunks for quota)
|
712 |
if document_chunks and len(document_chunks) > 0:
|
713 |
+
relevant_chunks = retrieve_relevant_chunks(message, document_chunks, document_embeddings, top_k=2)
|
714 |
context = "\n\n".join(relevant_chunks)
|
715 |
+
# Limit context size to avoid quota issues
|
716 |
+
if len(context) > 1000:
|
717 |
+
context = context[:1000] + "..."
|
718 |
else:
|
719 |
# Fallback to truncated document if RAG fails
|
720 |
+
context = processed_markdown[:800] + "..." if len(processed_markdown) > 800 else processed_markdown
|
721 |
|
722 |
# Create prompt for Gemini
|
723 |
prompt = f"""You are a helpful assistant that answers questions about documents. Use the provided context to answer questions accurately and concisely.
|
|
|
729 |
|
730 |
Please provide a clear and helpful answer based on the context provided."""
|
731 |
|
732 |
+
# Generate response using Gemini API with retry logic
|
733 |
+
import time
|
734 |
+
max_retries = 2
|
735 |
|
736 |
+
for attempt in range(max_retries):
|
737 |
+
try:
|
738 |
+
response = model.generate_content(prompt)
|
739 |
+
response_text = response.text if hasattr(response, 'text') else str(response)
|
740 |
+
return history + [[message, response_text]]
|
741 |
+
except Exception as api_error:
|
742 |
+
if "429" in str(api_error) and attempt < max_retries - 1:
|
743 |
+
# Rate limit hit, wait and retry
|
744 |
+
time.sleep(3)
|
745 |
+
continue
|
746 |
+
else:
|
747 |
+
# Other error or final attempt failed
|
748 |
+
if "429" in str(api_error):
|
749 |
+
return history + [[message, "β API quota exceeded. Please wait a moment and try again, or check your Gemini API billing."]]
|
750 |
+
else:
|
751 |
+
raise api_error
|
752 |
|
753 |
except Exception as e:
|
754 |
error_msg = f"β Error generating response: {str(e)}"
|