Ifeanyi commited on
Commit
86ba978
·
verified ·
1 Parent(s): 7f8604d

Upload retriever.py

Browse files
Files changed (1) hide show
  1. retriever.py +53 -0
retriever.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from google import genai
2
+ import time
3
+
4
+ # set up authentication
5
+ api_key = userdata.get("GEMINI_API_KEY")
6
+ client = genai.Client(api_key=api_key)
7
+
8
+ # read in documents
9
+ acled = PdfReader("/content/sample_data/ACLED.pdf")
10
+ dame = PdfReader("/content/sample_data/DAME.pdf")
11
+ nansen = PdfReader("/content/sample_data/Nansen.pdf")
12
+
13
+ # extract text from documents
14
+ acled_text = ""
15
+
16
+ dame_text = ""
17
+
18
+ nansen_text = ""
19
+
20
+ for page in acled.pages:
21
+ acled_text += page.extract_text()
22
+
23
+ for page in dame.pages:
24
+ dame_text += page.extract_text()
25
+
26
+ for page in nansen.pages:
27
+ nansen_text += page.extract_text()
28
+
29
+ # place extracted texts into a single list
30
+ documents = [acled_text, dame_text, nansen_text]
31
+
32
+ def retriever(prompt, history):
33
+ """
34
+ Retrieves information from pre-loaded documents.
35
+ Args:
36
+ prompt: A string representing the search query
37
+ history: A placeholder representing query history
38
+
39
+ Returns:
40
+ Search results in natural language.
41
+ """
42
+ context = "\n\n".join(documents)
43
+ super_prompt = f"{context}\n\nRetrieve information:\n\nQuestion: {prompt}"
44
+
45
+ response = client.models.generate_content(
46
+ model="gemini-2.0-flash",
47
+ contents=super_prompt)
48
+
49
+ stream_text = ""
50
+ for each in response.text.split():
51
+ stream_text += each + " "
52
+ time.sleep(0.01)
53
+ yield stream_text