Spaces:
Sleeping
Sleeping
from PyPDF2 import PdfReader | |
from google import genai | |
import time | |
import os | |
# set up authentication | |
api_key = os.getenv("GEMINI_API_KEY") | |
client = genai.Client(api_key=api_key) | |
# read in documents | |
acled = PdfReader("ACLED.pdf") | |
dame = PdfReader("DAME.pdf") | |
nansen = PdfReader("Nansen.pdf") | |
# extract text from documents | |
acled_text = "" | |
dame_text = "" | |
nansen_text = "" | |
for page in acled.pages: | |
acled_text += page.extract_text() | |
for page in dame.pages: | |
dame_text += page.extract_text() | |
for page in nansen.pages: | |
nansen_text += page.extract_text() | |
# place extracted texts into a single list | |
documents = [acled_text, dame_text, nansen_text] | |
def retriever(prompt, history): | |
""" | |
Retrieves information from pre-loaded documents. | |
Args: | |
prompt: A string representing the search query | |
history: A placeholder representing query history | |
Returns: | |
Search results in natural language. | |
""" | |
context = "\n\n".join(documents) | |
super_prompt = f"{context}\n\nRetrieve information:\n\nQuestion: {prompt}" | |
response = client.models.generate_content( | |
model="gemini-2.0-flash", | |
contents=super_prompt) | |
stream_text = "" | |
for each in response.text.split(): | |
stream_text += each + " " | |
time.sleep(0.01) | |
yield stream_text | |