disLodge commited on
Commit
f20eb59
·
verified ·
1 Parent(s): c26f9f0

Switching from zephyr to gpt turbo

Browse files
Files changed (1) hide show
  1. app.py +42 -29
app.py CHANGED
@@ -5,47 +5,52 @@ from langchain_community.vectorstores import Chroma
5
  from langchain_huggingface import HuggingFaceEmbeddings, ChatHuggingFace
6
  from langchain_core.runnables import RunnablePassthrough, Runnable
7
  from io import BytesIO
 
8
  from langchain_core.output_parsers import StrOutputParser
9
  from langchain_core.documents import Document
10
  from langchain_core.prompts import ChatPromptTemplate
11
  from langchain.text_splitter import CharacterTextSplitter
12
- from huggingface_hub import InferenceClient
13
  import logging
 
14
 
15
  # logging.basicConfig(level=logging.INFO)
16
  # logger = logging.getLogger(__name__)
17
 
18
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
19
 
20
- class HuggingFaceInterferenceClientRunnable(Runnable):
21
- def __init__(self, client, max_tokens=512, temperature=0.7, top_p=0.95):
22
- self.client = client
23
- self.max_tokens = max_tokens
24
- self.temperature = temperature
25
- self.top_p = top_p
26
 
27
- def invoke(self, input, config=None):
28
- prompt = input.to_messages()[0].content
29
- messages = [{"role": "user", "content": prompt}]
30
 
31
- response = ""
32
- for part in self.client.chat_completion(
33
- messages,
34
- max_tokens=self.max_tokens,
35
- stream=True,
36
- temperature=self.temperature,
37
- top_p=self.top_p
38
- ):
39
- token = part.choices[0].delta.content
40
- if token:
41
- response += token
42
 
43
- return response
 
 
44
 
45
- def update_params(self, max_tokens, temperature, top_p):
46
- self.max_tokens = max_tokens
47
- self.temperature=temperature
48
- self.top_p=top_p
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
 
51
  def extract_pdf_text(url: str) -> str:
@@ -70,7 +75,13 @@ vectorstore = Chroma.from_documents(
70
  )
71
  retriever = vectorstore.as_retriever()
72
 
73
- llm = HuggingFaceInterferenceClientRunnable(client)
 
 
 
 
 
 
74
 
75
  # Before RAG chain
76
  before_rag_template = "What is {topic}"
@@ -103,7 +114,9 @@ after_rag_chain = (
103
 
104
  def process_query(role, system_message, max_tokens, temperature, top_p):
105
 
106
- llm.update_params(max_tokens, temperature, top_p)
 
 
107
 
108
  # Before RAG
109
  before_rag_result = before_rag_chain.invoke({"topic": "Hugging Face"})
 
5
  from langchain_huggingface import HuggingFaceEmbeddings, ChatHuggingFace
6
  from langchain_core.runnables import RunnablePassthrough, Runnable
7
  from io import BytesIO
8
+ from langchain_openai import ChatOpenAI
9
  from langchain_core.output_parsers import StrOutputParser
10
  from langchain_core.documents import Document
11
  from langchain_core.prompts import ChatPromptTemplate
12
  from langchain.text_splitter import CharacterTextSplitter
13
+ # from huggingface_hub import InferenceClient
14
  import logging
15
+ import os
16
 
17
  # logging.basicConfig(level=logging.INFO)
18
  # logger = logging.getLogger(__name__)
19
 
20
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "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")
21
 
 
 
 
 
 
 
22
 
23
+ # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
24
 
25
+ # class HuggingFaceInterferenceClientRunnable(Runnable):
26
+ # def __init__(self, client, max_tokens=512, temperature=0.7, top_p=0.95):
27
+ # self.client = client
28
+ # self.max_tokens = max_tokens
29
+ # self.temperature = temperature
30
+ # self.top_p = top_p
 
 
 
 
 
31
 
32
+ # def invoke(self, input, config=None):
33
+ # prompt = input.to_messages()[0].content
34
+ # messages = [{"role": "user", "content": prompt}]
35
 
36
+ # response = ""
37
+ # for part in self.client.chat_completion(
38
+ # messages,
39
+ # max_tokens=self.max_tokens,
40
+ # stream=True,
41
+ # temperature=self.temperature,
42
+ # top_p=self.top_p
43
+ # ):
44
+ # token = part.choices[0].delta.content
45
+ # if token:
46
+ # response += token
47
+
48
+ # return response
49
+
50
+ # def update_params(self, max_tokens, temperature, top_p):
51
+ # self.max_tokens = max_tokens
52
+ # self.temperature=temperature
53
+ # self.top_p=top_p
54
 
55
 
56
  def extract_pdf_text(url: str) -> str:
 
75
  )
76
  retriever = vectorstore.as_retriever()
77
 
78
+ llm = ChatOpenAI(
79
+ model="gpt-3.5-turbo",
80
+ api_key=OPENAI_API_KEY,
81
+ max_tokens=512,
82
+ temperature=0.7,
83
+ top_p=0.95
84
+ )
85
 
86
  # Before RAG chain
87
  before_rag_template = "What is {topic}"
 
114
 
115
  def process_query(role, system_message, max_tokens, temperature, top_p):
116
 
117
+ llm.max_tokens = max_tokens
118
+ llm.temperature = temperature
119
+ llm.top_p = top_p
120
 
121
  # Before RAG
122
  before_rag_result = before_rag_chain.invoke({"topic": "Hugging Face"})