mrfakename commited on
Commit
b73f20c
·
verified ·
1 Parent(s): 098f37b

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -112
app.py DELETED
@@ -1,112 +0,0 @@
1
- import gradio as gr
2
- import torch
3
- from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
4
- from threading import Thread
5
- import spaces
6
-
7
- class ChatInterface:
8
- def __init__(self, model_name="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"):
9
- self.tokenizer = AutoTokenizer.from_pretrained(model_name)
10
- self.model = AutoModelForCausalLM.from_pretrained(
11
- model_name,
12
- torch_dtype=torch.float16,
13
- device_map="auto"
14
- )
15
-
16
- def format_chat_prompt(self, message, history, system_message):
17
- messages = [{"role": "system", "content": system_message}]
18
-
19
- for user_msg, assistant_msg in history:
20
- if user_msg:
21
- messages.append({"role": "user", "content": user_msg})
22
- if assistant_msg:
23
- messages.append({"role": "assistant", "content": assistant_msg})
24
-
25
- messages.append({"role": "user", "content": message})
26
-
27
- # Format messages according to model's expected chat template
28
- prompt = self.tokenizer.apply_chat_template(
29
- messages,
30
- tokenize=False,
31
- add_generation_prompt=True
32
- )
33
- return prompt
34
-
35
- @spaces.GPU
36
- def generate_response(
37
- self,
38
- message,
39
- history,
40
- system_message,
41
- max_tokens,
42
- temperature,
43
- top_p,
44
- ):
45
- prompt = self.format_chat_prompt(message, history, system_message)
46
- inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
47
-
48
- # Setup streamer
49
- streamer = TextIteratorStreamer(
50
- self.tokenizer,
51
- timeout=10.0,
52
- skip_prompt=True,
53
- skip_special_tokens=True
54
- )
55
-
56
- # Generate in a separate thread to enable streaming
57
- generation_kwargs = dict(
58
- inputs=inputs,
59
- streamer=streamer,
60
- max_new_tokens=max_tokens,
61
- temperature=temperature,
62
- top_p=top_p,
63
- do_sample=True,
64
- )
65
-
66
- thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
67
- thread.start()
68
-
69
- # Stream the response
70
- response = ""
71
- for new_text in streamer:
72
- response += new_text
73
- yield response
74
-
75
- def create_demo():
76
- chat_interface = ChatInterface()
77
-
78
- demo = gr.ChatInterface(
79
- chat_interface.generate_response,
80
- additional_inputs=[
81
- gr.Textbox(
82
- value="You are a friendly Chatbot.",
83
- label="System message"
84
- ),
85
- gr.Slider(
86
- minimum=1,
87
- maximum=2048,
88
- value=512,
89
- step=1,
90
- label="Max new tokens"
91
- ),
92
- gr.Slider(
93
- minimum=0.1,
94
- maximum=4.0,
95
- value=0.7,
96
- step=0.1,
97
- label="Temperature"
98
- ),
99
- gr.Slider(
100
- minimum=0.1,
101
- maximum=1.0,
102
- value=0.95,
103
- step=0.05,
104
- label="Top-p (nucleus sampling)"
105
- ),
106
- ],
107
- )
108
- return demo
109
-
110
- if __name__ == "__main__":
111
- demo = create_demo()
112
- demo.launch()