AndreAugustoDev commited on
Commit
1914f9b
·
verified ·
1 Parent(s): 4cf9d72
Files changed (6) hide show
  1. Dockerfile +14 -0
  2. README.md +3 -3
  3. app.py +104 -0
  4. bot.png +0 -0
  5. requirements.txt +9 -0
  6. user.png +0 -0
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ WORKDIR /code
7
+
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
+
12
+ COPY . .
13
+
14
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- title: Oci Ds 33b
3
- emoji: 💻
4
  colorFrom: indigo
5
  colorTo: gray
6
  sdk: docker
@@ -8,4 +8,4 @@ pinned: false
8
  license: mit
9
  ---
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: OpenCodeInterpreter DS 33B
3
+ emoji: 🧠
4
  colorFrom: indigo
5
  colorTo: gray
6
  sdk: docker
 
8
  license: mit
9
  ---
10
 
11
+ Check out the configuration reference at <https://huggingface.co/docs/hub/spaces-config-reference>
app.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from threading import Thread
4
+ from typing import Iterator
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
+
7
+ MAX_MAX_NEW_TOKENS = 1024
8
+ MAX_INPUT_TOKEN_LENGTH = 2048
9
+
10
+ # base_model_name = "m-a-p/OpenCodeInterpreter-DS-6.7B"
11
+ base_model_name = "m-a-p/OpenCodeInterpreter-DS-33B"
12
+ model = AutoModelForCausalLM.from_pretrained(base_model_name, torch_dtype=torch.bfloat16, device_map="cpu")
13
+
14
+ tokenizer = AutoTokenizer.from_pretrained(base_model_name)
15
+
16
+ def format_prompt(message, history):
17
+ system_prompt = "You are OpenCodeInterpreter, you are an expert programmer that helps to write code based on the user request, with concise explanations."
18
+ prompt = []
19
+ prompt.append({"role": "system", "content": system_prompt})
20
+ for user_prompt, bot_response in history:
21
+ prompt.extend([{"role": "user", "content": user_prompt}, {"role": "assistant", "content": bot_response}])
22
+ prompt.append({"role": "user", "content": message})
23
+ return prompt
24
+
25
+ def generate(prompt: str, history: list[tuple[str, str]], max_new_tokens: int = 1024, temperature: float = 0.3,
26
+ top_p: float = 0.9, top_k: int = 50, repetition_penalty: float = 1 ) -> Iterator[str]:
27
+
28
+ temperature = float(temperature)
29
+ if temperature < 1e-2:
30
+ temperature = 1e-2
31
+
32
+ formatted_prompt = []
33
+ formatted_prompt = format_prompt(prompt, history)
34
+
35
+ input_ids = tokenizer.apply_chat_template(formatted_prompt, return_tensors="pt", add_generation_prompt=True)
36
+ if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
37
+ input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
38
+ input_ids = input_ids.to(model.device)
39
+
40
+ streamer = TextIteratorStreamer(tokenizer, timeout=15.0, skip_prompt=True, skip_special_tokens=True)
41
+ generation_kwargs = dict({"input_ids": input_ids}, streamer=streamer, max_new_tokens=max_new_tokens, do_sample=False, top_p=top_p, top_k=top_k,
42
+ temperature=temperature, num_beams=1, repetition_penalty=repetition_penalty, eos_token_id=tokenizer.eos_token_id)
43
+
44
+ t = Thread(target=model.generate, kwargs=generation_kwargs )
45
+ t.start()
46
+
47
+ outputs = []
48
+ for chunk in streamer:
49
+ outputs.append(chunk)
50
+ yield "".join(outputs).replace("<|EOT|>","")
51
+
52
+
53
+ oci_chatbot = gr.Chatbot(layout="bubble", avatar_images=["user.png", "bot.png"], bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True,)
54
+
55
+ additional_inputs = additional_inputs=[
56
+ gr.Slider(
57
+ label="Max new tokens",
58
+ minimum=1,
59
+ maximum=MAX_MAX_NEW_TOKENS,
60
+ step=1,
61
+ value=512,
62
+ ),
63
+ gr.Slider(
64
+ label="Temperature",
65
+ minimum=0,
66
+ maximum=1.0,
67
+ step=0.1,
68
+ value=0.3,
69
+ ),
70
+ gr.Slider(
71
+ label="Top-p",
72
+ minimum=0.05,
73
+ maximum=1.0,
74
+ step=0.05,
75
+ value=0.9,
76
+ ),
77
+ gr.Slider(
78
+ label="Top-k",
79
+ minimum=1,
80
+ maximum=1000,
81
+ step=1,
82
+ value=50,
83
+ ),
84
+ gr.Slider(
85
+ label="Repetition penalty",
86
+ minimum=1.0,
87
+ maximum=2.0,
88
+ step=0.05,
89
+ value=1,
90
+ )]
91
+
92
+ iface = gr.ChatInterface(fn=generate,
93
+ chatbot=oci_chatbot,
94
+ additional_inputs=additional_inputs,
95
+ description=" Running on CPU. The response may be slow for cpu environments. 🙏🏻",
96
+ retry_btn=None,
97
+ undo_btn=None
98
+ )
99
+
100
+ with gr.Blocks() as main:
101
+ gr.HTML("<center><h1>Tomoniai's Chat with OpenCodeInterpreter</h1></center>")
102
+ iface.render()
103
+
104
+ main.queue(max_size=10).launch(show_api=False)
bot.png ADDED
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.23.0
2
+ bitsandbytes==0.41.1
3
+ gradio==3.48.0
4
+ protobuf==3.20.3
5
+ scipy==1.11.2
6
+ sentencepiece==0.1.99
7
+ spaces==0.16.1
8
+ torch==2.0.0
9
+ transformers==4.36.2
user.png ADDED