|
import torch |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
import gradio as gr |
|
|
|
model_name = "codellama/CodeLlama-7b-Instruct-hf" |
|
print("Loading model...") |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_name, |
|
torch_dtype=torch.float16, |
|
device_map="auto") |
|
print("Model loaded.") |
|
|
|
generator = pipeline( |
|
"text-generation", |
|
model=model, |
|
tokenizer=tokenizer, |
|
temperature=0.1, |
|
top_p=0.95, |
|
max_new_tokens=512, |
|
repetition_penalty=1.05) |
|
|
|
def format_prompt(chat): |
|
prompt = "" |
|
for user_msg, ai_reply in chat: |
|
prompt += f"<s>[INST] {user_msg.strip()} [/INST] {ai_reply.strip()}</s>\n" |
|
return prompt |
|
|
|
def chat_fn(user_input, history): |
|
history = history or [] |
|
prompt = format_prompt(history + [[user_input, ""]]) |
|
generated = generator(prompt, do_sample=True)[0]["generated_text"] |
|
answer = generated[len(prompt):].strip() |
|
history.append((user_input, answer)) |
|
return "", history |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# 🦙 CodeLlama Copilot\nFree & private code assistant.") |
|
chatbot = gr.Chatbot(label="Developer Assistant", height=400, type="messages") |
|
with gr.Row(): |
|
msg = gr.Textbox(placeholder="Ask me coding questions", show_label=False, container=False) |
|
clear = gr.Button("🔄 Clear Conversation") |
|
msg.submit(chat_fn, [msg, chatbot], [msg, chatbot]) |
|
clear.click(lambda: ("", []), None, [msg, chatbot]) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |