Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -2,66 +2,48 @@ import gradio as gr
|
|
2 |
from huggingface_hub import InferenceClient
|
3 |
from collections import defaultdict
|
4 |
|
5 |
-
# Initialize
|
6 |
client = InferenceClient("Futuresony/future_ai_12_10_2024.gguf")
|
7 |
|
8 |
-
#
|
9 |
-
|
10 |
-
session_histories = defaultdict(list) # Stores chat history per session
|
11 |
-
|
12 |
-
def format_chat_history(history, system_message):
|
13 |
-
"""Formats history into a single string in Alpaca/LLaMA style."""
|
14 |
-
chat_str = f"{system_message}\n\n" # Start with system message
|
15 |
|
|
|
|
|
|
|
16 |
for user_msg, bot_response in history:
|
17 |
-
chat_str += f"
|
18 |
-
|
19 |
-
return chat_str # Return formatted conversation history
|
20 |
-
|
21 |
-
def extract_keywords(text):
|
22 |
-
"""Extracts simple keywords from user input."""
|
23 |
-
words = text.lower().split()
|
24 |
-
common_words = {"the", "is", "a", "and", "to", "of", "in", "it", "you", "for"} # Ignore common words
|
25 |
-
return [word for word in words if word not in common_words]
|
26 |
|
27 |
def respond(message, history, system_message, max_tokens, temperature, top_p):
|
28 |
-
session_id = id(history) # Unique ID
|
29 |
-
session_history = session_histories[session_id] # Retrieve
|
30 |
|
31 |
-
#
|
32 |
-
|
33 |
-
|
34 |
-
user_preferences[kw] += 1
|
35 |
|
36 |
-
#
|
37 |
-
formatted_input = format_chat_history(session_history, system_message) + f"### Instruction:\n{message}\n\n### Response:\n"
|
38 |
-
|
39 |
-
# Send request (fix: ensure input is a single string)
|
40 |
response = client.text_generation(
|
41 |
-
|
42 |
max_new_tokens=max_tokens,
|
43 |
temperature=temperature,
|
44 |
top_p=top_p,
|
45 |
)
|
46 |
|
47 |
-
#
|
48 |
-
cleaned_response = response.
|
49 |
|
50 |
-
#
|
51 |
session_history.append((message, cleaned_response))
|
52 |
|
53 |
-
|
54 |
-
most_asked = max(user_preferences, key=user_preferences.get, default=None)
|
55 |
-
if most_asked and most_asked in message.lower():
|
56 |
-
cleaned_response += f"\n\nNaona unapenda mada ya '{most_asked}' sana! Unataka kujua zaidi?"
|
57 |
-
|
58 |
-
return cleaned_response # ✅ Fixed: Returns only the final response
|
59 |
|
60 |
-
#
|
61 |
demo = gr.ChatInterface(
|
62 |
respond,
|
63 |
additional_inputs=[
|
64 |
-
gr.Textbox(value="
|
65 |
gr.Slider(minimum=1, maximum=2048, value=250, step=1, label="Max new tokens"),
|
66 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
67 |
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
|
|
|
2 |
from huggingface_hub import InferenceClient
|
3 |
from collections import defaultdict
|
4 |
|
5 |
+
# Initialize model client
|
6 |
client = InferenceClient("Futuresony/future_ai_12_10_2024.gguf")
|
7 |
|
8 |
+
# Memory storage
|
9 |
+
session_histories = defaultdict(list) # Stores full chat history per session
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
+
def format_chat_history(history):
|
12 |
+
"""Formats history in a structured way for LLaMA models."""
|
13 |
+
chat_str = ""
|
14 |
for user_msg, bot_response in history:
|
15 |
+
chat_str += f"User: {user_msg}\nAI: {bot_response}\n"
|
16 |
+
return chat_str.strip() # Remove unnecessary spaces
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
def respond(message, history, system_message, max_tokens, temperature, top_p):
|
19 |
+
session_id = id(history) # Unique session ID
|
20 |
+
session_history = session_histories[session_id] # Retrieve stored history
|
21 |
|
22 |
+
# Add user message to history
|
23 |
+
formatted_history = format_chat_history(session_history)
|
24 |
+
full_input = f"{system_message}\n\n{formatted_history}\nUser: {message}\nAI:"
|
|
|
25 |
|
26 |
+
# Generate response
|
|
|
|
|
|
|
27 |
response = client.text_generation(
|
28 |
+
full_input,
|
29 |
max_new_tokens=max_tokens,
|
30 |
temperature=temperature,
|
31 |
top_p=top_p,
|
32 |
)
|
33 |
|
34 |
+
# Extract & clean response
|
35 |
+
cleaned_response = response.strip()
|
36 |
|
37 |
+
# Update chat history
|
38 |
session_history.append((message, cleaned_response))
|
39 |
|
40 |
+
return cleaned_response
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
+
# Gradio Chat Interface
|
43 |
demo = gr.ChatInterface(
|
44 |
respond,
|
45 |
additional_inputs=[
|
46 |
+
gr.Textbox(value="You are an AI assistant that remembers previous conversations.", label="System message"),
|
47 |
gr.Slider(minimum=1, maximum=2048, value=250, step=1, label="Max new tokens"),
|
48 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
49 |
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
|