disLodge commited on
Commit
0152be5
·
verified ·
1 Parent(s): f84ae09

Moved back to a mistral model

Browse files
Files changed (1) hide show
  1. app.py +42 -32
app.py CHANGED
@@ -10,47 +10,60 @@ from langchain_core.output_parsers import StrOutputParser
10
  from langchain_core.documents import Document
11
  from langchain_core.prompts import ChatPromptTemplate
12
  from langchain.text_splitter import CharacterTextSplitter
13
- # from huggingface_hub import InferenceClient
14
  import logging
15
  import os
16
 
17
  # logging.basicConfig(level=logging.INFO)
18
  # logger = logging.getLogger(__name__)
19
 
20
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "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")
 
 
 
 
 
 
 
 
21
 
22
 
23
- # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
 
 
24
 
25
- # class HuggingFaceInterferenceClientRunnable(Runnable):
26
- # def __init__(self, client, max_tokens=512, temperature=0.7, top_p=0.95):
27
- # self.client = client
28
- # self.max_tokens = max_tokens
29
- # self.temperature = temperature
30
- # self.top_p = top_p
31
 
32
- # def invoke(self, input, config=None):
33
- # prompt = input.to_messages()[0].content
34
- # messages = [{"role": "user", "content": prompt}]
35
 
36
- # response = ""
37
- # for part in self.client.chat_completion(
38
- # messages,
39
- # max_tokens=self.max_tokens,
40
- # stream=True,
41
- # temperature=self.temperature,
42
- # top_p=self.top_p
43
- # ):
44
- # token = part.choices[0].delta.content
45
- # if token:
46
- # response += token
 
47
 
48
- # return response
49
 
50
- # def update_params(self, max_tokens, temperature, top_p):
51
- # self.max_tokens = max_tokens
52
- # self.temperature=temperature
53
- # self.top_p=top_p
54
 
55
 
56
  def extract_pdf_text(url: str) -> str:
@@ -109,10 +122,7 @@ after_rag_chain = (
109
 
110
  def process_query(role, system_message, max_tokens, temperature, top_p):
111
 
112
- llm.max_tokens = max_tokens
113
- llm.temperature = temperature
114
- llm.top_p = top_p
115
-
116
 
117
  # After RAG
118
  after_rag_result = after_rag_chain.invoke({"role": role})
 
10
  from langchain_core.documents import Document
11
  from langchain_core.prompts import ChatPromptTemplate
12
  from langchain.text_splitter import CharacterTextSplitter
13
+ from huggingface_hub import InferenceClient
14
  import logging
15
  import os
16
 
17
  # logging.basicConfig(level=logging.INFO)
18
  # logger = logging.getLogger(__name__)
19
 
20
+ lo = "hf_JyAJApaXhIrONPFSIo"
21
+ ve = "wbnJbrXViYurrsvP"
22
+ half = lo+ve
23
+ HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN",half )
24
+ client = InferenceClient(
25
+ model="mistralai/Mixtral-8x7B-Instruct-v0.1",
26
+ token=HF_TOKEN,
27
+ provider="hf-inference"
28
+ )
29
 
30
 
31
+ class HuggingFaceInterferenceClientRunnable(Runnable):
32
+ def __init__(self, client, max_tokens=512, temperature=0.7, top_p=0.95):
33
+ self.client = client
34
+ self.max_tokens = max_tokens
35
+ self.temperature = temperature
36
+ self.top_p = top_p
37
 
38
+ @retry(
39
+ stop=stop_sfter_attempt(3)
40
+ wait=wait_exponential(multiplier=1, min=4, max=10)
41
+ retry=retry_if_exception_type((requests.exceptions.ConnectionError, requests.exceptions.Timeout))
42
+ )
 
43
 
44
+ def invoke(self, input, config=None):
45
+ prompt = input.to_messages()[0].content
46
+ messages = [{"role": "user", "content": prompt}]
47
 
48
+ response = ""
49
+ for part in self.client.chat_completion(
50
+ messages,
51
+ max_tokens=self.max_tokens,
52
+ stream=True,
53
+ temperature=self.temperature,
54
+ top_p=self.top_p
55
+ ):
56
+ for part in part.choices:
57
+ token = part.delta.content
58
+ if token:
59
+ response += token
60
 
61
+ return response
62
 
63
+ def update_params(self, max_tokens, temperature, top_p):
64
+ self.max_tokens = max_tokens
65
+ self.temperature=temperature
66
+ self.top_p=top_p
67
 
68
 
69
  def extract_pdf_text(url: str) -> str:
 
122
 
123
  def process_query(role, system_message, max_tokens, temperature, top_p):
124
 
125
+ llm.update_params(max_tokens, temperature, top_p)
 
 
 
126
 
127
  # After RAG
128
  after_rag_result = after_rag_chain.invoke({"role": role})