Ifeanyi commited on
Commit
06ce591
·
verified ·
1 Parent(s): 91c9599

Update retriever.py

Browse files
Files changed (1) hide show
  1. retriever.py +53 -53
retriever.py CHANGED
@@ -1,53 +1,53 @@
1
- from google import genai
2
- import time
3
-
4
- # set up authentication
5
- api_key = userdata.get("GEMINI_API_KEY")
6
- client = genai.Client(api_key=api_key)
7
-
8
- # read in documents
9
- acled = PdfReader("/content/sample_data/ACLED.pdf")
10
- dame = PdfReader("/content/sample_data/DAME.pdf")
11
- nansen = PdfReader("/content/sample_data/Nansen.pdf")
12
-
13
- # extract text from documents
14
- acled_text = ""
15
-
16
- dame_text = ""
17
-
18
- nansen_text = ""
19
-
20
- for page in acled.pages:
21
- acled_text += page.extract_text()
22
-
23
- for page in dame.pages:
24
- dame_text += page.extract_text()
25
-
26
- for page in nansen.pages:
27
- nansen_text += page.extract_text()
28
-
29
- # place extracted texts into a single list
30
- documents = [acled_text, dame_text, nansen_text]
31
-
32
- def retriever(prompt, history):
33
- """
34
- Retrieves information from pre-loaded documents.
35
- Args:
36
- prompt: A string representing the search query
37
- history: A placeholder representing query history
38
-
39
- Returns:
40
- Search results in natural language.
41
- """
42
- context = "\n\n".join(documents)
43
- super_prompt = f"{context}\n\nRetrieve information:\n\nQuestion: {prompt}"
44
-
45
- response = client.models.generate_content(
46
- model="gemini-2.0-flash",
47
- contents=super_prompt)
48
-
49
- stream_text = ""
50
- for each in response.text.split():
51
- stream_text += each + " "
52
- time.sleep(0.01)
53
- yield stream_text
 
1
+ from google import genai
2
+ import time
3
+
4
+ # set up authentication
5
+ api_key = userdata.get("GEMINI_API_KEY")
6
+ client = genai.Client(api_key=api_key)
7
+
8
+ # read in documents
9
+ acled = PdfReader("ACLED.pdf")
10
+ dame = PdfReader("DAME.pdf")
11
+ nansen = PdfReader("Nansen.pdf")
12
+
13
+ # extract text from documents
14
+ acled_text = ""
15
+
16
+ dame_text = ""
17
+
18
+ nansen_text = ""
19
+
20
+ for page in acled.pages:
21
+ acled_text += page.extract_text()
22
+
23
+ for page in dame.pages:
24
+ dame_text += page.extract_text()
25
+
26
+ for page in nansen.pages:
27
+ nansen_text += page.extract_text()
28
+
29
+ # place extracted texts into a single list
30
+ documents = [acled_text, dame_text, nansen_text]
31
+
32
+ def retriever(prompt, history):
33
+ """
34
+ Retrieves information from pre-loaded documents.
35
+ Args:
36
+ prompt: A string representing the search query
37
+ history: A placeholder representing query history
38
+
39
+ Returns:
40
+ Search results in natural language.
41
+ """
42
+ context = "\n\n".join(documents)
43
+ super_prompt = f"{context}\n\nRetrieve information:\n\nQuestion: {prompt}"
44
+
45
+ response = client.models.generate_content(
46
+ model="gemini-2.0-flash",
47
+ contents=super_prompt)
48
+
49
+ stream_text = ""
50
+ for each in response.text.split():
51
+ stream_text += each + " "
52
+ time.sleep(0.01)
53
+ yield stream_text