File size: 2,805 Bytes
bbd5c76
 
06323bb
 
 
 
 
 
 
 
 
 
 
3421ed4
 
 
 
 
6761a81
3421ed4
717d3b5
06323bb
 
8735569
06323bb
717d3b5
 
 
bbd5c76
3421ed4
717d3b5
 
3421ed4
bbd5c76
717d3b5
7ea7d8d
bbd5c76
3421ed4
717d3b5
6761a81
717d3b5
 
 
6761a81
f5e2959
717d3b5
 
6761a81
 
f5e2959
 
717d3b5
f5e2959
717d3b5
 
 
 
 
 
f5e2959
 
3421ed4
fdefcb0
6761a81
9eb3d05
619f1a8
 
 
 
 
6761a81
beacea3
6761a81
717d3b5
6761a81
3421ed4
bbd5c76
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import gradio as gr
from huggingface_hub import InferenceClient
import requests
from bs4 import BeautifulSoup
from bs4.element import Comment

def tag_visible(element):
    if element.parent.name in ['style', 'script', 'head', 'title', 'meta', '[document]']:
        return False
    if isinstance(element, Comment):
        return False
    return True

def get_text_from_url(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    texts = soup.find_all(text=True)
    visible_texts = filter(tag_visible, texts)
    return "\n".join(t.strip() for t in visible_texts)

# Pre-fetch and truncate homepage text to keep the prompt short
text_list = []
homepage_url = "https://sites.google.com/view/abhilashnandy/home/"
extensions = ["", "pmrf-profile-page"]
for ext in extensions:
    full_text = get_text_from_url(homepage_url + ext)
    truncated_text = full_text[:1000]  # use only the first 1000 characters
    text_list.append(truncated_text)

SYSTEM_MESSAGE = (
    "You are a QA chatbot to answer queries (in less than 30 words) on my homepage. "
    "Context: " + " ".join(text_list)
)

# Use the GPTQ version that includes the tokenizer configuration
client = InferenceClient("TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ")

def respond(message, history: list[tuple[str, str]], system_message=SYSTEM_MESSAGE,
            max_tokens=100, temperature=0.7, top_p=0.95):
    messages = [{"role": "system", "content": system_message}]
    for q, a in history:
        messages.append({"role": "user", "content": "Question: " + q})
        messages.append({"role": "assistant", "content": "Answer: " + a})
    messages.append({"role": "user", "content": message})
    try:
        # Enable streaming mode to start receiving output faster.
        response_stream = client.chat_completion(
            messages,
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            stream=True,
        )
        output = ""
        for chunk in response_stream:
            if hasattr(chunk, "choices") and chunk.choices:
                part = chunk.choices[0].message.get("content", "")
                output += part
        return output.strip()
    except Exception as e:
        print(f"An error occurred: {e}")
        return str(e)

initial_message = [("user", "Yo who dis Abhilash?")]
markdown_note = "## Ask Anything About Me! (Might show a tad bit of hallucination!)"

demo = gr.Blocks()
with demo:
    gr.Markdown(markdown_note)
    gr.ChatInterface(
        fn=respond,
        # examples=["Yo who dis Abhilash?", "What is Abhilash's most recent publication?"],
        additional_inputs=[
            # You can add extra Gradio components here if needed.
        ],
    )

if __name__ == "__main__":
    demo.launch()