svjack commited on
Commit
e1e1bf8
Β·
1 Parent(s): 6f17125

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +142 -0
app.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llm_rs import AutoModel,SessionConfig,GenerationConfig,Precision,KnownModels
3
+
4
+ repo_name = "svjack/ggml"
5
+ file_name = "wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin"
6
+
7
+ examples = [
8
+ "Explain the meaning of word Ottoman",
9
+ "Explain the meaning of πŸ‘¨",
10
+ "Use following emojis to generate a short description of a scene , the emojis are πŸ‘¨πŸ‘©πŸ”₯❄️",
11
+ "Use following emojis to generate a short description of a scene , the emojis are 🌲πŸ”₯πŸ‘¨πŸ’¦",
12
+ ]
13
+
14
+ session_config = SessionConfig(threads=2,batch_size=2)
15
+ #model = AutoModel.from_pretrained(repo_name, model_file=file_name, session_config=session_config,verbose=True)
16
+
17
+ '''
18
+ model_path = "/Users/svjack/Library/Application Support/nomic.ai/GPT4All/wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin"
19
+ model = AutoModel.from_pretrained(model_path,
20
+ model_type=KnownModels.Llama)
21
+ '''
22
+
23
+ def process_stream(instruction, temperature, top_p, top_k, max_new_tokens, seed):
24
+
25
+ prompt=f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
26
+ ### Instruction:
27
+ {instruction}
28
+ ### Response:
29
+ Answer:"""
30
+ generation_config = GenerationConfig(seed=seed,temperature=temperature,top_p=top_p,top_k=top_k,max_new_tokens=max_new_tokens)
31
+ response = ""
32
+ streamer = model.stream(prompt=prompt,generation_config=generation_config)
33
+ for new_text in streamer:
34
+ response += new_text
35
+ yield response
36
+
37
+
38
+ with gr.Blocks(
39
+ theme=gr.themes.Soft(),
40
+ css=".disclaimer {font-variant-caps: all-small-caps;}",
41
+ ) as demo:
42
+ gr.Markdown(
43
+ """<h1><center> Wizardlm-13b on CPU in Rust πŸ¦€</center></h1>
44
+
45
+ This demo uses the [rustformers/llm](https://github.com/rustformers/llm) library via [llm-rs](https://github.com/LLukas22/llm-rs-python) on 2 CPU cores.
46
+ """
47
+ )
48
+ with gr.Row():
49
+ with gr.Column():
50
+ with gr.Row():
51
+ instruction = gr.Textbox(
52
+ placeholder="Enter your question or instruction here",
53
+ label="Question/Instruction",
54
+ elem_id="q-input",
55
+ )
56
+ with gr.Accordion("Advanced Options:", open=False):
57
+ with gr.Row():
58
+ with gr.Column():
59
+ with gr.Row():
60
+ temperature = gr.Slider(
61
+ label="Temperature",
62
+ value=0.8,
63
+ minimum=0.1,
64
+ maximum=1.0,
65
+ step=0.1,
66
+ interactive=True,
67
+ info="Higher values produce more diverse outputs",
68
+ )
69
+ with gr.Column():
70
+ with gr.Row():
71
+ top_p = gr.Slider(
72
+ label="Top-p (nucleus sampling)",
73
+ value=0.95,
74
+ minimum=0.0,
75
+ maximum=1.0,
76
+ step=0.01,
77
+ interactive=True,
78
+ info=(
79
+ "Sample from the smallest possible set of tokens whose cumulative probability "
80
+ "exceeds top_p. Set to 1 to disable and sample from all tokens."
81
+ ),
82
+ )
83
+ with gr.Column():
84
+ with gr.Row():
85
+ top_k = gr.Slider(
86
+ label="Top-k",
87
+ value=40,
88
+ minimum=5,
89
+ maximum=80,
90
+ step=1,
91
+ interactive=True,
92
+ info="Sample from a shortlist of top-k tokens β€” 0 to disable and sample from all tokens.",
93
+ )
94
+ with gr.Column():
95
+ with gr.Row():
96
+ max_new_tokens = gr.Slider(
97
+ label="Maximum new tokens",
98
+ value=256,
99
+ minimum=0,
100
+ maximum=1024,
101
+ step=5,
102
+ interactive=True,
103
+ info="The maximum number of new tokens to generate",
104
+ )
105
+
106
+ with gr.Column():
107
+ with gr.Row():
108
+ seed = gr.Number(
109
+ label="Seed",
110
+ value=42,
111
+ interactive=True,
112
+ info="The seed to use for the generation",
113
+ precision=0
114
+ )
115
+ with gr.Row():
116
+ submit = gr.Button("Submit")
117
+ with gr.Row():
118
+ with gr.Box():
119
+ gr.Markdown("**Wizardlm-13b**")
120
+ output_7b = gr.Markdown()
121
+
122
+ with gr.Row():
123
+ gr.Examples(
124
+ examples=examples,
125
+ inputs=[instruction],
126
+ cache_examples=False,
127
+ fn=process_stream,
128
+ outputs=output_7b,
129
+ )
130
+
131
+ submit.click(
132
+ process_stream,
133
+ inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
134
+ outputs=output_7b,
135
+ )
136
+ instruction.submit(
137
+ process_stream,
138
+ inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
139
+ outputs=output_7b,
140
+ )
141
+
142
+ demo.queue(max_size=4, concurrency_count=1).launch(debug=True)