arahman347 commited on
Commit
a009cfe
·
verified ·
1 Parent(s): 3b409b0

uploaded bot

Browse files
Files changed (1) hide show
  1. app.py +87 -0
app.py CHANGED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import InferenceClient #imports huggingface models
3
+
4
+ # NEW LIBRARIES
5
+
6
+ from sentence_transformers import SentenceTransformer
7
+ import torch
8
+ import numpy as np
9
+ import os
10
+ ## START NEW CODE
11
+
12
+ # Load and process the knowledge base text file
13
+ with open("/content/knowledge.txt", "r", encoding="utf-8") as f:
14
+ knowledge_text = f.read()
15
+
16
+ # Split the text into chunks (for example, by paragraphs)
17
+ chunks = [chunk.strip() for chunk in knowledge_text.split("\n\n") if chunk.strip()]
18
+
19
+ # Load an embedding model (this one is light and fast)
20
+ embedder = SentenceTransformer('all-MiniLM-L6-v2')
21
+
22
+ # Precompute embeddings for all chunks (as a tensor for fast similarity search)
23
+ chunk_embeddings = embedder.encode(chunks, convert_to_tensor=True)
24
+
25
+ def get_relevant_context(query, top_k=3):
26
+ """
27
+ Compute the embedding for the query, compare it against all chunk embeddings,
28
+ and return the top_k most similar chunks concatenated into a context string.
29
+ """
30
+
31
+ # Compute and normalize the query embedding
32
+ query_embedding = embedder.encode(query, convert_to_tensor=True)
33
+ query_embedding = query_embedding / query_embedding.norm()
34
+
35
+ # Normalize chunk embeddings along the embedding dimension
36
+ norm_chunk_embeddings = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
37
+
38
+ # Compute cosine similarity between the query and each chunk
39
+ similarities = torch.matmul(norm_chunk_embeddings, query_embedding)
40
+
41
+ # Get the indices of the top_k most similar chunks
42
+ top_k_indices = torch.topk(similarities, k=top_k).indices.cpu().numpy()
43
+
44
+ # Concatenate the top chunks into a single context string
45
+ context = "\n\n".join([chunks[i] for i in top_k_indices])
46
+ return context
47
+
48
+ ## END OF NEW CODE
49
+
50
+ client = InferenceClient("google/gemma-2-2b-it")
51
+
52
+ def respond(message, history):
53
+ messages = [{"role": "system", "content": "I am a kind chatbot."}]
54
+
55
+ # NEW CODE
56
+ # Retrieve context relevant to the current user message
57
+ context = get_relevant_context(message, top_k=3)
58
+
59
+ # add all previous messages to the messages list
60
+ if history:
61
+ for user_msg, assistant_msg in history:
62
+ messages.append({"role": "user", "content": user_msg})
63
+ messages.append({"role": "assistant", "content": assistant_msg})
64
+
65
+ # add the current user's message to the messages list
66
+ messages.append({"role": "user", "content": message})
67
+
68
+ # makes the chat completion API call,
69
+ # sending the messages and other parameters to the model
70
+ # implements streaming, where one word/token appears at a time
71
+ response = ""
72
+
73
+ # iterate through each message in the method
74
+ for message in client.chat_completion(
75
+ messages,
76
+ max_tokens=500,
77
+ temperature=.1,
78
+ stream=True):
79
+
80
+ # add the tokens to the output content
81
+ token = message.choices[0].delta.content # capture the most recent toke
82
+ response += token # Add it to the response
83
+ yield response # yield the response:
84
+
85
+ chatbot = gr.ChatInterface(respond)
86
+
87
+ chatbot.launch()