lukalafaye commited on
Commit
e904228
·
1 Parent(s): 4ddaccd
Files changed (1) hide show
  1. app.py +136 -0
app.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from llama_cpp_cuda_tensorcores import Llama
3
+ from huggingface_hub import hf_hub_download
4
+ import spaces
5
+
6
+ # Constants
7
+ REPO_ID = "MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF"
8
+ MODEL_NAME = "Meta-Llama-3-70B-Instruct.Q3_K_L.gguf"
9
+ MAX_CONTEXT_LENGTH = 8192
10
+ CUDA = True
11
+ SYSTEM_PROMPT = "You are a helpful, smart, kind, and efficient AI assistant. You always fulfill the user's requests to the best of your ability."
12
+ TOKEN_STOP = ["<|eot_id|>"]
13
+ SYS_MSG = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nSYSTEM_PROMPT<|eot_id|>\n"
14
+ USER_PROMPT = (
15
+ "<|start_header_id|>user<|end_header_id|>\n\nUSER_PROMPT<|eot_id|>\n"
16
+ )
17
+ ASSIS_PROMPT = "<|start_header_id|>assistant<|end_header_id|>\n\n"
18
+ END_ASSIS_PREVIOUS_RESPONSE = "<|eot_id|>\n"
19
+
20
+ TASK_PROMPT = {
21
+ "Assistant": SYSTEM_PROMPT,
22
+ }
23
+
24
+ # ChatLLM class for handling the chat
25
+ class ChatLLM:
26
+ def __init__(self, config_model):
27
+ self.llm = None
28
+ self.config_model = config_model
29
+
30
+ def load_cpp_model(self):
31
+ self.llm = Llama(**self.config_model)
32
+
33
+ def apply_chat_template(self, history, system_message):
34
+ history = history or []
35
+ messages = SYS_MSG.replace("SYSTEM_PROMPT", system_message.strip())
36
+ for msg in history:
37
+ messages += (
38
+ USER_PROMPT.replace("USER_PROMPT", msg[0]) + ASSIS_PROMPT + msg[1]
39
+ )
40
+ messages += END_ASSIS_PREVIOUS_RESPONSE if msg[1] else ""
41
+
42
+ return messages
43
+
44
+ @spaces.GPU(duration=120)
45
+ def response(
46
+ self,
47
+ history,
48
+ system_message,
49
+ max_tokens,
50
+ temperature,
51
+ top_p,
52
+ top_k,
53
+ repeat_penalty,
54
+ ):
55
+ messages = self.apply_chat_template(history, system_message)
56
+
57
+ history[-1][1] = ""
58
+
59
+ if not self.llm:
60
+ print("Loading model")
61
+ self.load_cpp_model()
62
+
63
+ for output in self.llm(
64
+ messages,
65
+ echo=False,
66
+ stream=True,
67
+ max_tokens=max_tokens,
68
+ temperature=temperature,
69
+ top_p=top_p,
70
+ top_k=top_k,
71
+ repeat_penalty=repeat_penalty,
72
+ stop=TOKEN_STOP,
73
+ ):
74
+ answer = output["choices"][0]["text"]
75
+ history[-1][1] += answer
76
+
77
+ return history
78
+
79
+ # Download model from Hugging Face
80
+ model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_NAME)
81
+
82
+ # Model configuration
83
+ config_model = {
84
+ "model_path": model_path,
85
+ "n_ctx": MAX_CONTEXT_LENGTH,
86
+ "n_gpu_layers": -1 if CUDA else 0,
87
+ }
88
+
89
+ # Instantiate the chat model
90
+ llm_chat = ChatLLM(config_model)
91
+
92
+ # Streamlit UI
93
+ st.title("AI Chat Assistant")
94
+
95
+ # Initialize session state to store the chat history
96
+ if "chat_history" not in st.session_state:
97
+ st.session_state.chat_history = []
98
+
99
+ if "input_text" not in st.session_state:
100
+ st.session_state.input_text = ""
101
+
102
+ # Define response area
103
+ def chat_response():
104
+ if st.session_state.input_text.strip():
105
+ # User message
106
+ history = st.session_state.chat_history
107
+ history.append([st.session_state.input_text, ""])
108
+
109
+ # Model response
110
+ history = llm_chat.response(
111
+ history=history,
112
+ system_message=SYSTEM_PROMPT,
113
+ max_tokens=100, # Adjust token length as needed
114
+ temperature=0.7,
115
+ top_p=0.9,
116
+ top_k=50,
117
+ repeat_penalty=1.0,
118
+ )
119
+
120
+ st.session_state.chat_history = history
121
+ st.session_state.input_text = ""
122
+
123
+ # Textbox for user input
124
+ st.text_input("You: ", key="input_text", on_change=chat_response)
125
+
126
+ # Display chat history
127
+ if st.session_state.chat_history:
128
+ for user_msg, bot_resp in st.session_state.chat_history:
129
+ st.markdown(f"**You:** {user_msg}")
130
+ st.markdown(f"**Assistant:** {bot_resp}")
131
+
132
+ # Clear chat button
133
+ def clear_chat():
134
+ st.session_state.chat_history = []
135
+
136
+ st.button("Clear History", on_click=clear_chat)