Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -18,13 +18,13 @@ def get_text_from_url(url):
|
|
18 |
visible_texts = filter(tag_visible, texts)
|
19 |
return "\n".join(t.strip() for t in visible_texts)
|
20 |
|
21 |
-
# Pre-fetch and truncate homepage text to
|
22 |
text_list = []
|
23 |
homepage_url = "https://sites.google.com/view/abhilashnandy/home/"
|
24 |
extensions = ["", "pmrf-profile-page"]
|
25 |
for ext in extensions:
|
26 |
full_text = get_text_from_url(homepage_url + ext)
|
27 |
-
truncated_text = full_text[:1000] #
|
28 |
text_list.append(truncated_text)
|
29 |
|
30 |
SYSTEM_MESSAGE = (
|
@@ -32,8 +32,8 @@ SYSTEM_MESSAGE = (
|
|
32 |
"Context: " + " ".join(text_list)
|
33 |
)
|
34 |
|
35 |
-
#
|
36 |
-
client = InferenceClient("
|
37 |
|
38 |
def respond(message, history: list[tuple[str, str]], system_message=SYSTEM_MESSAGE,
|
39 |
max_tokens=100, temperature=0.7, top_p=0.95):
|
@@ -43,7 +43,7 @@ def respond(message, history: list[tuple[str, str]], system_message=SYSTEM_MESSA
|
|
43 |
messages.append({"role": "assistant", "content": "Answer: " + a})
|
44 |
messages.append({"role": "user", "content": message})
|
45 |
try:
|
46 |
-
#
|
47 |
response_stream = client.chat_completion(
|
48 |
messages,
|
49 |
max_tokens=max_tokens,
|
|
|
18 |
visible_texts = filter(tag_visible, texts)
|
19 |
return "\n".join(t.strip() for t in visible_texts)
|
20 |
|
21 |
+
# Pre-fetch and truncate homepage text to reduce the prompt length.
|
22 |
text_list = []
|
23 |
homepage_url = "https://sites.google.com/view/abhilashnandy/home/"
|
24 |
extensions = ["", "pmrf-profile-page"]
|
25 |
for ext in extensions:
|
26 |
full_text = get_text_from_url(homepage_url + ext)
|
27 |
+
truncated_text = full_text[:1000] # using only the first 1000 characters per extension
|
28 |
text_list.append(truncated_text)
|
29 |
|
30 |
SYSTEM_MESSAGE = (
|
|
|
32 |
"Context: " + " ".join(text_list)
|
33 |
)
|
34 |
|
35 |
+
# Use a model that is both lightweight and includes a proper chat configuration.
|
36 |
+
client = InferenceClient("TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ")
|
37 |
|
38 |
def respond(message, history: list[tuple[str, str]], system_message=SYSTEM_MESSAGE,
|
39 |
max_tokens=100, temperature=0.7, top_p=0.95):
|
|
|
43 |
messages.append({"role": "assistant", "content": "Answer: " + a})
|
44 |
messages.append({"role": "user", "content": message})
|
45 |
try:
|
46 |
+
# Enable streaming mode to receive output faster.
|
47 |
response_stream = client.chat_completion(
|
48 |
messages,
|
49 |
max_tokens=max_tokens,
|