Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -18,13 +18,13 @@ def get_text_from_url(url):
|
|
18 |
visible_texts = filter(tag_visible, texts)
|
19 |
return "\n".join(t.strip() for t in visible_texts)
|
20 |
|
21 |
-
# Pre-fetch and truncate homepage text to
|
22 |
text_list = []
|
23 |
homepage_url = "https://sites.google.com/view/abhilashnandy/home/"
|
24 |
extensions = ["", "pmrf-profile-page"]
|
25 |
for ext in extensions:
|
26 |
full_text = get_text_from_url(homepage_url + ext)
|
27 |
-
truncated_text = full_text[:1000] #
|
28 |
text_list.append(truncated_text)
|
29 |
|
30 |
SYSTEM_MESSAGE = (
|
@@ -32,8 +32,9 @@ SYSTEM_MESSAGE = (
|
|
32 |
"Context: " + " ".join(text_list)
|
33 |
)
|
34 |
|
35 |
-
#
|
36 |
-
|
|
|
37 |
|
38 |
def respond(message, history: list[tuple[str, str]], system_message=SYSTEM_MESSAGE,
|
39 |
max_tokens=100, temperature=0.7, top_p=0.95):
|
@@ -43,7 +44,7 @@ def respond(message, history: list[tuple[str, str]], system_message=SYSTEM_MESSA
|
|
43 |
messages.append({"role": "assistant", "content": "Answer: " + a})
|
44 |
messages.append({"role": "user", "content": message})
|
45 |
try:
|
46 |
-
#
|
47 |
response_stream = client.chat_completion(
|
48 |
messages,
|
49 |
max_tokens=max_tokens,
|
@@ -70,9 +71,7 @@ with demo:
|
|
70 |
gr.ChatInterface(
|
71 |
fn=respond,
|
72 |
# examples=["Yo who dis Abhilash?", "What is Abhilash's most recent publication?"],
|
73 |
-
additional_inputs=[
|
74 |
-
# You can add extra Gradio components here if needed.
|
75 |
-
],
|
76 |
)
|
77 |
|
78 |
if __name__ == "__main__":
|
|
|
18 |
visible_texts = filter(tag_visible, texts)
|
19 |
return "\n".join(t.strip() for t in visible_texts)
|
20 |
|
21 |
+
# Pre-fetch and truncate homepage text to reduce prompt length
|
22 |
text_list = []
|
23 |
homepage_url = "https://sites.google.com/view/abhilashnandy/home/"
|
24 |
extensions = ["", "pmrf-profile-page"]
|
25 |
for ext in extensions:
|
26 |
full_text = get_text_from_url(homepage_url + ext)
|
27 |
+
truncated_text = full_text[:1000] # using first 1000 characters to keep prompt short
|
28 |
text_list.append(truncated_text)
|
29 |
|
30 |
SYSTEM_MESSAGE = (
|
|
|
32 |
"Context: " + " ".join(text_list)
|
33 |
)
|
34 |
|
35 |
+
# Switch to a model optimized for low-latency CPU inference.
|
36 |
+
# Here we use a GPT4All model (assuming one is available via the Inference API).
|
37 |
+
client = InferenceClient("nomic-ai/gpt4all-lora")
|
38 |
|
39 |
def respond(message, history: list[tuple[str, str]], system_message=SYSTEM_MESSAGE,
|
40 |
max_tokens=100, temperature=0.7, top_p=0.95):
|
|
|
44 |
messages.append({"role": "assistant", "content": "Answer: " + a})
|
45 |
messages.append({"role": "user", "content": message})
|
46 |
try:
|
47 |
+
# Use streaming mode to return tokens as they are generated
|
48 |
response_stream = client.chat_completion(
|
49 |
messages,
|
50 |
max_tokens=max_tokens,
|
|
|
71 |
gr.ChatInterface(
|
72 |
fn=respond,
|
73 |
# examples=["Yo who dis Abhilash?", "What is Abhilash's most recent publication?"],
|
74 |
+
additional_inputs=[],
|
|
|
|
|
75 |
)
|
76 |
|
77 |
if __name__ == "__main__":
|