Shreyas094 commited on
Commit
a3a121b
·
verified ·
1 Parent(s): ee9e2d5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -32
app.py CHANGED
@@ -18,12 +18,6 @@ from huggingface_hub import InferenceClient
18
  huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
19
  llama_cloud_api_key = os.environ.get("LLAMA_CLOUD_API_KEY")
20
 
21
- # Initialize the InferenceClient
22
- client = InferenceClient(
23
- "meta-llama/Meta-Llama-3.1-8B-Instruct",
24
- token=huggingface_token, # Use your environment variable for the token
25
- )
26
-
27
  # Initialize LlamaParse
28
  llama_parser = LlamaParse(
29
  api_key=llama_cloud_api_key,
@@ -33,7 +27,7 @@ llama_parser = LlamaParse(
33
  language="en",
34
  )
35
 
36
- def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[Document]:
37
  """Loads and splits the document into pages."""
38
  if parser == "pypdf":
39
  loader = PyPDFLoader(file.name)
@@ -76,34 +70,38 @@ def update_vectors(files, parser):
76
 
77
  return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}."
78
 
79
- def generate_chunked_response(prompt, max_tokens=1000, max_chunks=5, temperature=0.3, repetition_penalty=1.1):
 
 
 
 
 
80
  full_response = ""
 
 
81
  for _ in range(max_chunks):
82
- response = client.chat_completion(
83
- messages=[{"role": "user", "content": prompt}],
84
- max_tokens=max_tokens,
85
- temperature=temperature,
86
- repetition_penalty=repetition_penalty,
87
- stream=False,
88
- )
89
-
90
- if response and "choices" in response and len(response["choices"]) > 0:
91
- chunk = response["choices"][0]["message"]["content"]
92
-
93
- # Remove any part of the chunk that's already in full_response
94
- new_content = chunk[len(full_response):].strip()
95
-
96
- if not new_content:
97
- break # No new content, so we're done
98
-
99
- full_response += new_content
100
 
101
- if chunk.endswith((".", "!", "?", "</s>", "[/INST]")):
102
  break
103
 
104
- # Update the prompt for the next iteration
105
- prompt = full_response
106
- else:
 
 
107
  break
108
 
109
  # Clean up the response
@@ -184,11 +182,11 @@ def chatbot_interface(message, history, use_web_search, temperature, repetition_
184
 
185
  # Gradio interface
186
  with gr.Blocks() as demo:
187
- gr.Markdown("# AI-powered Web Search and PDF Chat Assistant")
188
 
189
  with gr.Row():
190
  file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
191
- parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="llamaparse")
192
  update_button = gr.Button("Upload Document")
193
 
194
  update_output = gr.Textbox(label="Update Status")
 
18
  huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
19
  llama_cloud_api_key = os.environ.get("LLAMA_CLOUD_API_KEY")
20
 
 
 
 
 
 
 
21
  # Initialize LlamaParse
22
  llama_parser = LlamaParse(
23
  api_key=llama_cloud_api_key,
 
27
  language="en",
28
  )
29
 
30
+ def load_document(file: NamedTemporaryFile, parser: str = "pypdf") -> List[Document]:
31
  """Loads and splits the document into pages."""
32
  if parser == "pypdf":
33
  loader = PyPDFLoader(file.name)
 
70
 
71
  return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}."
72
 
73
+ def generate_chunked_response(prompt, max_tokens=1000, max_chunks=5, temperature=0.2, repetition_penalty=1.1):
74
+ client = InferenceClient(
75
+ "meta-llama/Meta-Llama-3.1-8B-Instruct",
76
+ token=huggingface_token,
77
+ )
78
+
79
  full_response = ""
80
+ messages = [{"role": "user", "content": prompt}]
81
+
82
  for _ in range(max_chunks):
83
+ try:
84
+ chunk_response = ""
85
+ for message in client.chat_completion(
86
+ messages=messages,
87
+ max_new_tokens=max_tokens,
88
+ temperature=temperature,
89
+ repetition_penalty=repetition_penalty,
90
+ stream=True,
91
+ ):
92
+ chunk = message.choices[0].delta.content
93
+ if chunk:
94
+ chunk_response += chunk
95
+ full_response += chunk
 
 
 
 
 
96
 
97
+ if not chunk_response or chunk_response.endswith((".", "!", "?", "</s>", "[/INST]")):
98
  break
99
 
100
+ messages.append({"role": "assistant", "content": chunk_response})
101
+ messages.append({"role": "user", "content": "Continue"})
102
+
103
+ except Exception as e:
104
+ print(f"Error in generating response: {str(e)}")
105
  break
106
 
107
  # Clean up the response
 
182
 
183
  # Gradio interface
184
  with gr.Blocks() as demo:
185
+ gr.Markdown("# AI-powered Web Search and PDF Chat Assistant (Using Meta-Llama-3.1-8B-Instruct)")
186
 
187
  with gr.Row():
188
  file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
189
+ parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="pypdf")
190
  update_button = gr.Button("Upload Document")
191
 
192
  update_output = gr.Textbox(label="Update Status")