Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -18,42 +18,45 @@ def get_text_from_url(url):
|
|
18 |
visible_texts = filter(tag_visible, texts)
|
19 |
return "\n".join(t.strip() for t in visible_texts)
|
20 |
|
21 |
-
#
|
22 |
text_list = []
|
23 |
homepage_url = "https://sites.google.com/view/abhilashnandy/home/"
|
24 |
extensions = ["", "pmrf-profile-page"]
|
25 |
for ext in extensions:
|
26 |
-
|
27 |
-
|
|
|
28 |
|
29 |
-
# Build the system message with homepage info.
|
30 |
SYSTEM_MESSAGE = (
|
31 |
-
"You are a QA chatbot to answer queries (in less than 30 words) on my homepage
|
32 |
-
+ "
|
33 |
-
+ "\n\n"
|
34 |
)
|
35 |
|
36 |
-
# Use the GPTQ
|
37 |
client = InferenceClient("TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ")
|
38 |
|
39 |
def respond(message, history: list[tuple[str, str]], system_message=SYSTEM_MESSAGE,
|
40 |
-
max_tokens=
|
41 |
messages = [{"role": "system", "content": system_message}]
|
42 |
-
for
|
43 |
-
|
44 |
-
|
45 |
-
if len(val) >= 2:
|
46 |
-
messages.append({"role": "assistant", "content": "Answer: " + val[1]})
|
47 |
messages.append({"role": "user", "content": message})
|
48 |
try:
|
49 |
-
|
|
|
50 |
messages,
|
51 |
max_tokens=max_tokens,
|
52 |
temperature=temperature,
|
53 |
top_p=top_p,
|
54 |
-
|
55 |
)
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
57 |
except Exception as e:
|
58 |
print(f"An error occurred: {e}")
|
59 |
return str(e)
|
@@ -68,7 +71,7 @@ with demo:
|
|
68 |
fn=respond,
|
69 |
# examples=["Yo who dis Abhilash?", "What is Abhilash's most recent publication?"],
|
70 |
additional_inputs=[
|
71 |
-
#
|
72 |
],
|
73 |
)
|
74 |
|
|
|
18 |
visible_texts = filter(tag_visible, texts)
|
19 |
return "\n".join(t.strip() for t in visible_texts)
|
20 |
|
21 |
+
# Pre-fetch and truncate homepage text to keep the prompt short
|
22 |
text_list = []
|
23 |
homepage_url = "https://sites.google.com/view/abhilashnandy/home/"
|
24 |
extensions = ["", "pmrf-profile-page"]
|
25 |
for ext in extensions:
|
26 |
+
full_text = get_text_from_url(homepage_url + ext)
|
27 |
+
truncated_text = full_text[:1000] # use only the first 1000 characters
|
28 |
+
text_list.append(truncated_text)
|
29 |
|
|
|
30 |
SYSTEM_MESSAGE = (
|
31 |
+
"You are a QA chatbot to answer queries (in less than 30 words) on my homepage. "
|
32 |
+
"Context: " + " ".join(text_list)
|
|
|
33 |
)
|
34 |
|
35 |
+
# Use the GPTQ version that includes the tokenizer configuration
|
36 |
client = InferenceClient("TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ")
|
37 |
|
38 |
def respond(message, history: list[tuple[str, str]], system_message=SYSTEM_MESSAGE,
|
39 |
+
max_tokens=100, temperature=0.7, top_p=0.95):
|
40 |
messages = [{"role": "system", "content": system_message}]
|
41 |
+
for q, a in history:
|
42 |
+
messages.append({"role": "user", "content": "Question: " + q})
|
43 |
+
messages.append({"role": "assistant", "content": "Answer: " + a})
|
|
|
|
|
44 |
messages.append({"role": "user", "content": message})
|
45 |
try:
|
46 |
+
# Enable streaming mode to start receiving output faster.
|
47 |
+
response_stream = client.chat_completion(
|
48 |
messages,
|
49 |
max_tokens=max_tokens,
|
50 |
temperature=temperature,
|
51 |
top_p=top_p,
|
52 |
+
stream=True,
|
53 |
)
|
54 |
+
output = ""
|
55 |
+
for chunk in response_stream:
|
56 |
+
if hasattr(chunk, "choices") and chunk.choices:
|
57 |
+
part = chunk.choices[0].message.get("content", "")
|
58 |
+
output += part
|
59 |
+
return output.strip()
|
60 |
except Exception as e:
|
61 |
print(f"An error occurred: {e}")
|
62 |
return str(e)
|
|
|
71 |
fn=respond,
|
72 |
# examples=["Yo who dis Abhilash?", "What is Abhilash's most recent publication?"],
|
73 |
additional_inputs=[
|
74 |
+
# You can add extra Gradio components here if needed.
|
75 |
],
|
76 |
)
|
77 |
|