Spaces:

abhi1nandy2
/

my-chatbot

Runtime error

App Files Files Community

abhi1nandy2 commited on Feb 7

Commit

6b7515b

verified ·

1 Parent(s): 717d3b5

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -8

app.py CHANGED Viewed

@@ -18,13 +18,13 @@ def get_text_from_url(url):
     visible_texts = filter(tag_visible, texts)
     return "\n".join(t.strip() for t in visible_texts)
-# Pre-fetch and truncate homepage text to keep the prompt short
 text_list = []
 homepage_url = "https://sites.google.com/view/abhilashnandy/home/"
 extensions = ["", "pmrf-profile-page"]
 for ext in extensions:
     full_text = get_text_from_url(homepage_url + ext)
-    truncated_text = full_text[:1000]  # use only the first 1000 characters
     text_list.append(truncated_text)
 SYSTEM_MESSAGE = (
@@ -32,8 +32,9 @@ SYSTEM_MESSAGE = (
     "Context: " + " ".join(text_list)
 )
-# Use the GPTQ version that includes the tokenizer configuration
-client = InferenceClient("TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ")
 def respond(message, history: list[tuple[str, str]], system_message=SYSTEM_MESSAGE,
             max_tokens=100, temperature=0.7, top_p=0.95):
@@ -43,7 +44,7 @@ def respond(message, history: list[tuple[str, str]], system_message=SYSTEM_MESSA
         messages.append({"role": "assistant", "content": "Answer: " + a})
     messages.append({"role": "user", "content": message})
     try:
-        # Enable streaming mode to start receiving output faster.
         response_stream = client.chat_completion(
             messages,
             max_tokens=max_tokens,
@@ -70,9 +71,7 @@ with demo:
     gr.ChatInterface(
         fn=respond,
         # examples=["Yo who dis Abhilash?", "What is Abhilash's most recent publication?"],
-        additional_inputs=[
-            # You can add extra Gradio components here if needed.
-        ],
     )
 if __name__ == "__main__":

     visible_texts = filter(tag_visible, texts)
     return "\n".join(t.strip() for t in visible_texts)
+# Pre-fetch and truncate homepage text to reduce prompt length
 text_list = []
 homepage_url = "https://sites.google.com/view/abhilashnandy/home/"
 extensions = ["", "pmrf-profile-page"]
 for ext in extensions:
     full_text = get_text_from_url(homepage_url + ext)
+    truncated_text = full_text[:1000]  # using first 1000 characters to keep prompt short
     text_list.append(truncated_text)
 SYSTEM_MESSAGE = (
     "Context: " + " ".join(text_list)
 )
+# Switch to a model optimized for low-latency CPU inference.
+# Here we use a GPT4All model (assuming one is available via the Inference API).
+client = InferenceClient("nomic-ai/gpt4all-lora")
 def respond(message, history: list[tuple[str, str]], system_message=SYSTEM_MESSAGE,
             max_tokens=100, temperature=0.7, top_p=0.95):
         messages.append({"role": "assistant", "content": "Answer: " + a})
     messages.append({"role": "user", "content": message})
     try:
+        # Use streaming mode to return tokens as they are generated
         response_stream = client.chat_completion(
             messages,
             max_tokens=max_tokens,
     gr.ChatInterface(
         fn=respond,
         # examples=["Yo who dis Abhilash?", "What is Abhilash's most recent publication?"],
+        additional_inputs=[],
     )
 if __name__ == "__main__":