disLodge commited on
Commit
41ec366
·
verified ·
1 Parent(s): a87b35b
Files changed (1) hide show
  1. app.py +51 -23
app.py CHANGED
@@ -17,12 +17,56 @@ import logging
17
  import os
18
 
19
 
20
- # lo = "hf_JyAJApaXhIrONPFSIo"
21
- # ve = "wbnJbrXViYurrsvP"
22
-
23
- last_call_time = 0
24
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "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")
 
 
 
 
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  def extract_pdf_text(url: str) -> str:
27
  response = requests.get(url)
28
  pdf_file = BytesIO(response.content)
@@ -45,13 +89,7 @@ vectorstore = Chroma.from_documents(
45
  )
46
  retriever = vectorstore.as_retriever()
47
 
48
- llm = ChatOpenAI(
49
- model="gpt-3.5-turbo",
50
- api_key=OPENAI_API_KEY,
51
- max_tokens=512,
52
- temperature=0.7,
53
- top_p=0.95
54
- )
55
 
56
  # After RAG chain
57
  after_rag_template = """You are a {role}. Summarize the following content for yourself and speak in terms of first person.
@@ -79,18 +117,8 @@ after_rag_chain = (
79
 
80
  def process_query(role, system_message, max_tokens, temperature, top_p):
81
 
82
- global last_call_time
83
- current_time = time.time()
84
 
85
- if current_time - last_call_time < 60:
86
- wait_time = int(60 - (current_time - last_call_time))
87
- return f"Rate limit exceeded. Please wait {wait_time} seconds before trying again."
88
- # llm.update_params(max_tokens, temperature, top_p)
89
- last_call_time = current_time
90
- llm.max_tokens = max_tokens
91
- llm.temperature = temperature
92
- llm.top_p = top_p
93
-
94
  # After RAG
95
  after_rag_result = after_rag_chain.invoke({"role": role})
96
 
 
17
  import os
18
 
19
 
20
+ lo = "hf_JyAJApaXhIrONPFSIo"
21
+ ve = "wbnJbrXViYurrsvP"
22
+ half = lo+ve
23
+ HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", half)
24
+ client = InferenceClient(
25
+ model="mistralai/Mixtral-8x7B-Instruct-v0.1",
26
+ token=HF_TOKEN,
27
+ provider="hf-inference"
28
+ )
29
 
30
+ class HuggingFaceInferenceClientRunnable(Runnable):
31
+ def __init__(self, client, max_tokens=512, temperature=0.7, top_p=0.95):
32
+ self.client = client
33
+ self.max_tokens = max_tokens
34
+ self.temperature = temperature
35
+ self.top_p = top_p
36
+
37
+ @retry(
38
+ stop=stop_after_attempt(3),
39
+ wait=wait_exponential(multiplier=1, min=4, max=10),
40
+ retry=retry_if_exception_type((requests.exceptions.ConnectionError, requests.exceptions.Timeout))
41
+ )
42
+ def invoke(self, input, config=None):
43
+ # Extract the prompt from the input (ChatPromptTemplate output)
44
+ prompt = input.to_messages()[0].content
45
+ messages = [{"role": "user", "content": prompt}]
46
+
47
+ # Call the InferenceClient with streaming
48
+ response = ""
49
+ for parts in self.client.chat_completion(
50
+ messages,
51
+ max_tokens=self.max_tokens,
52
+ stream=True,
53
+ temperature=self.temperature,
54
+ top_p=self.top_p
55
+ ):
56
+ # Handle streaming response parts
57
+ for part in parts.choices:
58
+ token = part.delta.content
59
+ if token:
60
+ response += token
61
+
62
+ return response
63
+
64
+ def update_params(self, max_tokens, temperature, top_p):
65
+ self.max_tokens = max_tokens
66
+ self.temperature = temperature
67
+ self.top_p = top_p
68
+
69
+
70
  def extract_pdf_text(url: str) -> str:
71
  response = requests.get(url)
72
  pdf_file = BytesIO(response.content)
 
89
  )
90
  retriever = vectorstore.as_retriever()
91
 
92
+ llm = HuggingFaceInferenceClientRunnable(client)
 
 
 
 
 
 
93
 
94
  # After RAG chain
95
  after_rag_template = """You are a {role}. Summarize the following content for yourself and speak in terms of first person.
 
117
 
118
  def process_query(role, system_message, max_tokens, temperature, top_p):
119
 
120
+ llm.update_params(max_tokens, temperature, top_p)
 
121
 
 
 
 
 
 
 
 
 
 
122
  # After RAG
123
  after_rag_result = after_rag_chain.invoke({"role": role})
124