SearchGPT

Running

App Files Files Community

Shreyas094 commited on Jul 24, 2024

Commit

a3a121b

verified ·

1 Parent(s): ee9e2d5

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -32

app.py CHANGED Viewed

@@ -18,12 +18,6 @@ from huggingface_hub import InferenceClient
 huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
 llama_cloud_api_key = os.environ.get("LLAMA_CLOUD_API_KEY")
-# Initialize the InferenceClient
-client = InferenceClient(
-    "meta-llama/Meta-Llama-3.1-8B-Instruct",
-    token=huggingface_token,  # Use your environment variable for the token
-)
 # Initialize LlamaParse
 llama_parser = LlamaParse(
     api_key=llama_cloud_api_key,
@@ -33,7 +27,7 @@ llama_parser = LlamaParse(
     language="en",
 )
-def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[Document]:
     """Loads and splits the document into pages."""
     if parser == "pypdf":
         loader = PyPDFLoader(file.name)
@@ -76,34 +70,38 @@ def update_vectors(files, parser):
     return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}."
-def generate_chunked_response(prompt, max_tokens=1000, max_chunks=5, temperature=0.3, repetition_penalty=1.1):
     full_response = ""
     for _ in range(max_chunks):
-        response = client.chat_completion(
-            messages=[{"role": "user", "content": prompt}],
-            max_tokens=max_tokens,
-            temperature=temperature,
-            repetition_penalty=repetition_penalty,
-            stream=False,
-        )
-        if response and "choices" in response and len(response["choices"]) > 0:
-            chunk = response["choices"][0]["message"]["content"]
-            # Remove any part of the chunk that's already in full_response
-            new_content = chunk[len(full_response):].strip()
-            if not new_content:
-                break  # No new content, so we're done
-            full_response += new_content
-            if chunk.endswith((".", "!", "?", "</s>", "[/INST]")):
                 break
-            # Update the prompt for the next iteration
-            prompt = full_response
-        else:
             break
     # Clean up the response
@@ -184,11 +182,11 @@ def chatbot_interface(message, history, use_web_search, temperature, repetition_
 # Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("# AI-powered Web Search and PDF Chat Assistant")
     with gr.Row():
         file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
-        parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="llamaparse")
         update_button = gr.Button("Upload Document")
     update_output = gr.Textbox(label="Update Status")

 huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
 llama_cloud_api_key = os.environ.get("LLAMA_CLOUD_API_KEY")
 # Initialize LlamaParse
 llama_parser = LlamaParse(
     api_key=llama_cloud_api_key,
     language="en",
 )
+def load_document(file: NamedTemporaryFile, parser: str = "pypdf") -> List[Document]:
     """Loads and splits the document into pages."""
     if parser == "pypdf":
         loader = PyPDFLoader(file.name)
     return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}."
+def generate_chunked_response(prompt, max_tokens=1000, max_chunks=5, temperature=0.2, repetition_penalty=1.1):
+    client = InferenceClient(
+        "meta-llama/Meta-Llama-3.1-8B-Instruct",
+        token=huggingface_token,
+    )
     full_response = ""
+    messages = [{"role": "user", "content": prompt}]
     for _ in range(max_chunks):
+        try:
+            chunk_response = ""
+            for message in client.chat_completion(
+                messages=messages,
+                max_new_tokens=max_tokens,
+                temperature=temperature,
+                repetition_penalty=repetition_penalty,
+                stream=True,
+            ):
+                chunk = message.choices[0].delta.content
+                if chunk:
+                    chunk_response += chunk
+                    full_response += chunk
+            if not chunk_response or chunk_response.endswith((".", "!", "?", "</s>", "[/INST]")):
                 break
+            messages.append({"role": "assistant", "content": chunk_response})
+            messages.append({"role": "user", "content": "Continue"})
+        except Exception as e:
+            print(f"Error in generating response: {str(e)}")
             break
     # Clean up the response
 # Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("# AI-powered Web Search and PDF Chat Assistant (Using Meta-Llama-3.1-8B-Instruct)")
     with gr.Row():
         file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
+        parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="pypdf")
         update_button = gr.Button("Upload Document")
     update_output = gr.Textbox(label="Update Status")