abhi1nandy2 commited on
Commit
6b7515b
·
verified ·
1 Parent(s): 717d3b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -8
app.py CHANGED
@@ -18,13 +18,13 @@ def get_text_from_url(url):
18
  visible_texts = filter(tag_visible, texts)
19
  return "\n".join(t.strip() for t in visible_texts)
20
 
21
- # Pre-fetch and truncate homepage text to keep the prompt short
22
  text_list = []
23
  homepage_url = "https://sites.google.com/view/abhilashnandy/home/"
24
  extensions = ["", "pmrf-profile-page"]
25
  for ext in extensions:
26
  full_text = get_text_from_url(homepage_url + ext)
27
- truncated_text = full_text[:1000] # use only the first 1000 characters
28
  text_list.append(truncated_text)
29
 
30
  SYSTEM_MESSAGE = (
@@ -32,8 +32,9 @@ SYSTEM_MESSAGE = (
32
  "Context: " + " ".join(text_list)
33
  )
34
 
35
- # Use the GPTQ version that includes the tokenizer configuration
36
- client = InferenceClient("TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ")
 
37
 
38
  def respond(message, history: list[tuple[str, str]], system_message=SYSTEM_MESSAGE,
39
  max_tokens=100, temperature=0.7, top_p=0.95):
@@ -43,7 +44,7 @@ def respond(message, history: list[tuple[str, str]], system_message=SYSTEM_MESSA
43
  messages.append({"role": "assistant", "content": "Answer: " + a})
44
  messages.append({"role": "user", "content": message})
45
  try:
46
- # Enable streaming mode to start receiving output faster.
47
  response_stream = client.chat_completion(
48
  messages,
49
  max_tokens=max_tokens,
@@ -70,9 +71,7 @@ with demo:
70
  gr.ChatInterface(
71
  fn=respond,
72
  # examples=["Yo who dis Abhilash?", "What is Abhilash's most recent publication?"],
73
- additional_inputs=[
74
- # You can add extra Gradio components here if needed.
75
- ],
76
  )
77
 
78
  if __name__ == "__main__":
 
18
  visible_texts = filter(tag_visible, texts)
19
  return "\n".join(t.strip() for t in visible_texts)
20
 
21
+ # Pre-fetch and truncate homepage text to reduce prompt length
22
  text_list = []
23
  homepage_url = "https://sites.google.com/view/abhilashnandy/home/"
24
  extensions = ["", "pmrf-profile-page"]
25
  for ext in extensions:
26
  full_text = get_text_from_url(homepage_url + ext)
27
+ truncated_text = full_text[:1000] # using first 1000 characters to keep prompt short
28
  text_list.append(truncated_text)
29
 
30
  SYSTEM_MESSAGE = (
 
32
  "Context: " + " ".join(text_list)
33
  )
34
 
35
+ # Switch to a model optimized for low-latency CPU inference.
36
+ # Here we use a GPT4All model (assuming one is available via the Inference API).
37
+ client = InferenceClient("nomic-ai/gpt4all-lora")
38
 
39
  def respond(message, history: list[tuple[str, str]], system_message=SYSTEM_MESSAGE,
40
  max_tokens=100, temperature=0.7, top_p=0.95):
 
44
  messages.append({"role": "assistant", "content": "Answer: " + a})
45
  messages.append({"role": "user", "content": message})
46
  try:
47
+ # Use streaming mode to return tokens as they are generated
48
  response_stream = client.chat_completion(
49
  messages,
50
  max_tokens=max_tokens,
 
71
  gr.ChatInterface(
72
  fn=respond,
73
  # examples=["Yo who dis Abhilash?", "What is Abhilash's most recent publication?"],
74
+ additional_inputs=[],
 
 
75
  )
76
 
77
  if __name__ == "__main__":