bainskarman commited on
Commit
626f22e
·
verified ·
1 Parent(s): a1fd273

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -41
app.py CHANGED
@@ -3,29 +3,25 @@ import os
3
  import requests
4
  from langdetect import detect
5
  from PyPDF2 import PdfReader
6
-
7
- # Load the Hugging Face token from environment variables (secrets)
8
- token = os.environ.get("Key2") # Replace "KEY2" with your secret key name
9
-
10
- # Function to query the Hugging Face API
11
- def query_huggingface_api(prompt, max_new_tokens=1000, temperature=0.7, top_k=50):
12
- model_name = "HuggingFaceH4/zephyr-7b-alpha" # Replace with your preferred model
13
- api_url = f"https://api-inference.huggingface.co/models/{model_name}"
14
- headers = {"Authorization": f"Bearer {token}"}
15
- payload = {
16
- "inputs": prompt,
17
- "parameters": {
18
- "max_new_tokens": max_new_tokens,
19
- "temperature": temperature,
20
- "top_k": top_k,
21
- },
22
  }
23
- response = requests.post(api_url, headers=headers, json=payload)
24
- if response.status_code == 200:
25
- return response.json()[0]["generated_text"]
26
- else:
27
- st.error(f"Error: {response.status_code} - {response.text}")
28
- return None
29
 
30
  # Function to detect language
31
  def detect_language(text):
@@ -48,6 +44,20 @@ def extract_text_from_pdf(pdf_file):
48
  })
49
  return text_data
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  # Default system prompts for each query translation method
52
  DEFAULT_SYSTEM_PROMPTS = {
53
  "Multi-Query": """You are an AI language model assistant. Your task is to generate five
@@ -120,33 +130,65 @@ def main():
120
  formatted_prompt = system_prompt.format(question=prompt)
121
  st.write("**Formatted System Prompt:**", formatted_prompt)
122
 
123
- # Query the Hugging Face API for query translation
124
- translated_queries = query_huggingface_api(formatted_prompt, max_new_tokens, temperature, top_k)
125
  if translated_queries:
126
- st.write("**Translated Queries:**", translated_queries)
 
127
 
128
  # Indexing
129
  if st.button("Apply Indexing"):
130
  st.write(f"**Applied Indexing Method:** {indexing_method}")
131
- # Implement indexing logic here
132
- # Example: Indexing with ColBERT
133
- if indexing_method == "ColBERT":
134
- st.write("Indexing with ColBERT...")
135
-
136
- # Query the Hugging Face API for final response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  if st.button("Generate Response"):
138
- response = query_huggingface_api(prompt, max_new_tokens, temperature, top_k)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  if response:
140
  st.write("**Response:**", response)
141
 
142
- # Process PDF content if uploaded
143
- if pdf_file is not None:
144
- pdf_text_data = extract_text_from_pdf(pdf_file)
145
- if prompt:
146
- # Search for relevant content in the PDF
147
- for entry in pdf_text_data:
148
- if prompt.lower() in entry["content"].lower():
149
- st.write(f"**Page {entry['page']}, Line {entry['line']}:** {entry['content']}")
150
-
151
  if __name__ == "__main__":
152
  main()
 
3
  import requests
4
  from langdetect import detect
5
  from PyPDF2 import PdfReader
6
+ import replicate # For interacting with Llama models hosted on Replicate
7
+
8
+ # Load the Replicate API token from environment variables
9
+ replicate_api_token = os.environ.get("Key2") # Replace with your Replicate API token
10
+
11
+ # Function to query the Llama 3.2 7B Instruct model via Replicate
12
+ def query_llama_model(prompt, max_new_tokens=1000, temperature=0.7, top_k=50):
13
+ model_name = "meta/llama-3-7b-instruct" # Replace with the correct model name on Replicate
14
+ input_data = {
15
+ "prompt": prompt,
16
+ "max_new_tokens": max_new_tokens,
17
+ "temperature": temperature,
18
+ "top_k": top_k,
 
 
 
19
  }
20
+ response = replicate.run(
21
+ model_name,
22
+ input=input_data
23
+ )
24
+ return "".join(response) # Replicate returns a generator, so we join it into a single string
 
25
 
26
  # Function to detect language
27
  def detect_language(text):
 
44
  })
45
  return text_data
46
 
47
+ # Function to search for query in PDF content
48
+ def search_pdf_content(pdf_text_data, query):
49
+ results = []
50
+ for entry in pdf_text_data:
51
+ if query.lower() in entry["content"].lower():
52
+ results.append(entry)
53
+ return results
54
+
55
+ # Function to split text into chunks
56
+ def split_text_into_chunks(text, chunk_size=500):
57
+ words = text.split()
58
+ chunks = [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
59
+ return chunks
60
+
61
  # Default system prompts for each query translation method
62
  DEFAULT_SYSTEM_PROMPTS = {
63
  "Multi-Query": """You are an AI language model assistant. Your task is to generate five
 
130
  formatted_prompt = system_prompt.format(question=prompt)
131
  st.write("**Formatted System Prompt:**", formatted_prompt)
132
 
133
+ # Query the Llama model for query translation
134
+ translated_queries = query_llama_model(formatted_prompt, max_new_tokens, temperature, top_k)
135
  if translated_queries:
136
+ st.write("**Translated Queries:**")
137
+ st.write(translated_queries.split("\n")[-1]) # Print only the updated question part
138
 
139
  # Indexing
140
  if st.button("Apply Indexing"):
141
  st.write(f"**Applied Indexing Method:** {indexing_method}")
142
+ if pdf_file is not None:
143
+ # Extract and search PDF content
144
+ pdf_text_data = extract_text_from_pdf(pdf_file)
145
+ search_results = search_pdf_content(pdf_text_data, prompt)
146
+
147
+ if search_results:
148
+ st.write("**Relevant Content from PDF:**")
149
+ for result in search_results:
150
+ st.write(f"**Page {result['page']}, Line {result['line']}:** {result['content']}")
151
+
152
+ # Split text into chunks
153
+ chunks = split_text_into_chunks("\n".join([result["content"] for result in search_results]))
154
+ st.write("**Chunks Obtained from PDF:**")
155
+ for i, chunk in enumerate(chunks):
156
+ st.write(f"**Chunk {i + 1}:** {chunk}")
157
+
158
+ # Print summary of split for Multi-Representation
159
+ if indexing_method == "Multi-Representation":
160
+ st.write("**Summary of Split:**")
161
+ summary = query_llama_model(f"Summarize the following text:\n{chunks[0]}", max_new_tokens, temperature, top_k)
162
+ st.write(summary)
163
+ else:
164
+ st.write("**No relevant content found in the PDF.**")
165
+ else:
166
+ st.write("**No PDF uploaded.**")
167
+
168
+ # Generate Response
169
  if st.button("Generate Response"):
170
+ if pdf_file is not None:
171
+ # Extract and search PDF content
172
+ pdf_text_data = extract_text_from_pdf(pdf_file)
173
+ search_results = search_pdf_content(pdf_text_data, prompt)
174
+
175
+ if search_results:
176
+ st.write("**Relevant Content from PDF:**")
177
+ for result in search_results:
178
+ st.write(f"**Page {result['page']}, Line {result['line']}:** \"{result['content']}\"")
179
+
180
+ # Generate response based on PDF content
181
+ pdf_context = "\n".join([result["content"] for result in search_results])
182
+ response = query_llama_model(f"Based on the following context:\n{pdf_context}\n\nAnswer this question: {prompt}", max_new_tokens, temperature, top_k)
183
+ else:
184
+ st.write("**No relevant content found in the PDF. Generating response without PDF context.**")
185
+ response = query_llama_model(prompt, max_new_tokens, temperature, top_k)
186
+ else:
187
+ st.write("**No PDF uploaded. Generating response without PDF context.**")
188
+ response = query_llama_model(prompt, max_new_tokens, temperature, top_k)
189
+
190
  if response:
191
  st.write("**Response:**", response)
192
 
 
 
 
 
 
 
 
 
 
193
  if __name__ == "__main__":
194
  main()