arshiaafshani commited on
Commit
6d40614
·
verified ·
1 Parent(s): 623da4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -74
app.py CHANGED
@@ -1,95 +1,62 @@
1
  import gradio as gr
2
- from huggingface_hub import hf_hub_download
3
- from llama_cpp import Llama
4
- from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
5
- from llama_cpp_agent.providers import LlamaCppPythonProvider
6
- from llama_cpp_agent.chat_history import BasicChatHistory
7
- from llama_cpp_agent.chat_history.messages import UserMessage, AssistantMessage
8
-
9
- hf_hub_download(
10
- repo_id="mradermacher/Arsh-llm-GGUF",
11
- filename="Arsh-llm.Q4_K_M.gguf",
12
- local_dir="./models"
13
- )
14
-
15
- llm = Llama(
16
- model_path="./models/Arsh-llm.Q4_K_M.gguf",
17
- n_batch=512,
18
- n_ctx=8192,
19
- verbose=False
20
- )
21
-
22
- provider = LlamaCppPythonProvider(llm)
23
-
24
- agent = LlamaCppAgent(
25
- provider,
26
- system_prompt="You are Arsh, a helpful assistant by Arshia Afshani. You should answer the user carefully.",
27
- predefined_messages_formatter_type=MessagesFormatterType.CHATML,
28
- debug_output=False
29
  )
30
 
31
  def respond(message, chat_history, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty):
32
- settings = provider.get_provider_default_settings()
33
- settings.temperature = temperature
34
- settings.top_k = top_k
35
- settings.top_p = top_p
36
- settings.max_tokens = max_tokens
37
- settings.repeat_penalty = repeat_penalty
38
- settings.stream = True
39
-
40
- if chat_history is None:
41
- chat_history = []
42
-
43
- messages = BasicChatHistory()
44
-
45
- for msg in chat_history:
46
- if msg["role"] == "user":
47
- messages.add_message(UserMessage(msg["content"]))
48
- elif msg["role"] == "assistant":
49
- messages.add_message(AssistantMessage(msg["content"]))
50
-
51
- stream = agent.get_chat_response(
52
- message,
53
- chat_history=messages,
54
- llm_sampling_settings=settings,
55
- returns_streaming_generator=True,
56
- print_output=False
57
  )
58
-
59
- response = ""
60
- for token in stream:
61
- response += token
62
- yield response
 
 
63
 
64
  with gr.Blocks() as demo:
65
  gr.Markdown("# Arsh-LLM Demo")
66
 
67
  with gr.Row():
68
  with gr.Column():
69
- system_msg = gr.Textbox("You are Arsh, a helpful assistant by Arshia Afshani. You should answer the user carefully.", label="System Message", interactive=True)
 
70
  max_tokens = gr.Slider(1, 4096, value=2048, step=1, label="Max Tokens")
71
  temperature = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
72
  top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")
73
  top_k = gr.Slider(0, 100, value=40, step=1, label="Top-k")
74
  repeat_penalty = gr.Slider(0.0, 2.0, value=1.1, step=0.1, label="Repetition Penalty")
75
 
76
- chatbot = gr.Chatbot(
77
- bubble_full_width=False,
78
- show_copy_button=True
79
- )
80
 
81
- chat_interface = gr.ChatInterface(
82
- respond,
83
- additional_inputs=[
84
- system_msg,
85
- max_tokens,
86
- temperature,
87
- top_p,
88
- top_k,
89
- repeat_penalty
90
- ],
91
- chatbot=chatbot
92
- )
93
 
94
  if __name__ == "__main__":
95
  demo.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
+ import torch
4
+
5
+ # Load model and tokenizer
6
+ model_name = "Arsh-llm" # نام مدل شما در Hugging Face
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
9
+
10
+ # Create pipeline
11
+ pipe = pipeline(
12
+ "text-generation",
13
+ model=model,
14
+ tokenizer=tokenizer,
15
+ device=0 if torch.cuda.is_available() else -1
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  )
17
 
18
  def respond(message, chat_history, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty):
19
+ # Prepare prompt
20
+ prompt = f"{system_message}\n\nUser: {message}\nAssistant:"
21
+
22
+ # Generate response
23
+ output = pipe(
24
+ prompt,
25
+ max_new_tokens=max_tokens,
26
+ temperature=temperature,
27
+ top_p=top_p,
28
+ top_k=top_k,
29
+ repetition_penalty=repeat_penalty,
30
+ do_sample=True,
31
+ pad_token_id=tokenizer.eos_token_id
 
 
 
 
 
 
 
 
 
 
 
 
32
  )
33
+
34
+ response = output[0]['generated_text'].split("Assistant:")[-1].strip()
35
+
36
+ # Update chat history
37
+ chat_history.append((message, response))
38
+
39
+ return "", chat_history
40
 
41
  with gr.Blocks() as demo:
42
  gr.Markdown("# Arsh-LLM Demo")
43
 
44
  with gr.Row():
45
  with gr.Column():
46
+ system_msg = gr.Textbox("You are Arsh, a helpful assistant by Arshia Afshani. You should answer the user carefully.",
47
+ label="System Message", interactive=True)
48
  max_tokens = gr.Slider(1, 4096, value=2048, step=1, label="Max Tokens")
49
  temperature = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
50
  top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")
51
  top_k = gr.Slider(0, 100, value=40, step=1, label="Top-k")
52
  repeat_penalty = gr.Slider(0.0, 2.0, value=1.1, step=0.1, label="Repetition Penalty")
53
 
54
+ chatbot = gr.Chatbot(bubble_full_width=False, show_copy_button=True)
55
+ msg = gr.Textbox(label="Your Message")
56
+ clear = gr.Button("Clear")
 
57
 
58
+ msg.submit(respond, [msg, chatbot, system_msg, max_tokens, temperature, top_p, top_k, repeat_penalty], [msg, chatbot])
59
+ clear.click(lambda: None, None, chatbot, queue=False)
 
 
 
 
 
 
 
 
 
 
60
 
61
  if __name__ == "__main__":
62
  demo.launch()