detroitnatif commited on
Commit
786df00
·
1 Parent(s): 964e814

Adding GroqSearch Space

Browse files
Files changed (1) hide show
  1. researcher.py +100 -0
researcher.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from config import *
2
+ import os
3
+ from dotenv import load_dotenv, find_dotenv
4
+ import json
5
+ import requests
6
+ from langchain_groq import ChatGroq
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain.chains import RetrievalQA
9
+ from langchain.prompts import PromptTemplate
10
+ from langchain.document_loaders.url import UnstructuredURLLoader
11
+ from langchain.vectorstores.faiss import FAISS
12
+ from langchain_community.embeddings import HuggingFaceEmbeddings
13
+ import os
14
+ load_dotenv(find_dotenv())
15
+
16
+ class Researcher:
17
+
18
+ def __init__(self):
19
+ self.serper_api_key = os.getenv("SERPER_API_KEY")
20
+ self.groq_api_key = os.getenv("GROQ_API_KEY")
21
+ self.prompt_template = PromptTemplate(
22
+ template=PROMPT_TEMPLATE,
23
+ input_variables=INPUT_VARIABLES
24
+ )
25
+ self.text_splitter = RecursiveCharacterTextSplitter(
26
+ separators=SEPARATORS,
27
+ chunk_size=CHUNK_SIZE,
28
+ chunk_overlap=CHUNK_OVERLAP
29
+ )
30
+ self.llm = ChatGroq(temperature=0.5, model_name="mixtral-8x7b-32768", groq_api_key=self.groq_api_key)
31
+ self.hfembeddings = HuggingFaceEmbeddings(
32
+ model_name=EMBEDDER,
33
+ model_kwargs={'device': 'cpu'}
34
+ )
35
+
36
+ def search_articles(self, query):
37
+ url = "https://google.serper.dev/search"
38
+ data = json.dumps({"q": query})
39
+
40
+ headers = {
41
+ 'X-API-KEY': self.serper_api_key,
42
+ 'Content-Type': 'application/json'
43
+ }
44
+
45
+ try:
46
+ response = requests.post(url, headers=headers, data=data)
47
+ response.raise_for_status() # Raises an HTTPError for bad responses
48
+ return response.json()
49
+ except requests.exceptions.HTTPError as e:
50
+ print(f"HTTP Error: {e}")
51
+ except requests.exceptions.ConnectionError as e:
52
+ print(f"Connection Error: {e}")
53
+ except requests.exceptions.Timeout as e:
54
+ print("Timeout Error:", e)
55
+ except requests.exceptions.RequestException as e:
56
+ print("Unexpected Error:", e)
57
+ return {} # Return an empty dict in case of failure
58
+
59
+
60
+ def research_answerer(self):
61
+
62
+ research_qa_chain = RetrievalQA.from_chain_type(
63
+ llm=self.llm,
64
+ chain_type=CHAIN_TYPE,
65
+ retriever= self.db.as_retriever(search_kwargs=SEARCH_KWARGS),
66
+ return_source_documents=True,
67
+ verbose=True,
68
+ chain_type_kwargs={"prompt": self.prompt_template}
69
+ )
70
+ return research_qa_chain
71
+
72
+ def get_urls(self, articles):
73
+ urls = []
74
+ try:
75
+ urls.append(articles["answerBox"]["link"])
76
+ except:
77
+ pass
78
+ for i in range(0, min(3, len(articles["organic"]))):
79
+ urls.append(articles["organic"][i]["link"])
80
+ return urls
81
+
82
+ def get_content_from_urls(self, urls):
83
+ loader = UnstructuredURLLoader(urls=urls)
84
+ research_content = loader.load()
85
+ return research_content
86
+
87
+ def research_given_query(self, research_objective, research_content):
88
+
89
+ docs = self.text_splitter.split_documents(research_content)
90
+ self.db = FAISS.from_documents(documents=docs, embedding=self.hfembeddings)
91
+ bot = self.research_answerer()
92
+ research_out =bot({"query": research_objective})
93
+ return research_out["result"]
94
+
95
+ def research(self, query):
96
+ search_articles = self.search_articles(query)
97
+ urls = self.get_urls(search_articles)
98
+ research_content = self.get_content_from_urls(urls)
99
+ answer = self.research_given_query(query, research_content)
100
+ return answer