Spaces:

gnumanth
/

llama3-chat

Running

App Files Files Community

gnumanth commited on Apr 18, 2024

Commit

78011f3

verified ·

1 Parent(s): dc16a81

init0

Browse files

Files changed (1) hide show

app.py +61 -0

app.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
+device = 'cuda'
+torch_dtype = torch.bfloat16
+@gr.funcs
+def load_model() -> AutoModelForCausalLM:
+    return AutoModelForCausalLM.from_pretrained(model_name, device=device, torch_dtype=torch_dtype)
+@gr.funcs
+def load_tokenizer() -> AutoTokenizer:
+    return AutoTokenizer.from_pretrained(model_name)
+@gr.funcs
+def preprocess_messages(message: str, history: list, system_prompt: str) -> dict:
+    messages = [{'role': 'system', 'content': system_prompt}, {'role': 'user', 'content': message}]
+    prompt = load_tokenizer().apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    return prompt
+@gr.funcs
+def generate_text(prompt: str, max_new_tokens: int, temperature: float) -> str:
+    model = load_model()
+    terminators = [load_tokenizer().eos_token_id, load_tokenizer().convert_tokens_to_ids(['\n'])]
+    temp = temperature + 0.1
+    outputs = model.generate(
+        prompt,
+        max_new_tokens=max_new_tokens,
+        eos_token_id=terminators[0],
+        do_sample=True,
+        temperature=temp,
+        top_p=0.9
+     )
+    return load_tokenizer().decode(outputs[0], skip_special_tokens=True)
+@gr.funcs
+def chat_function(
+    message: str,
+    history: list,
+    system_prompt: str,
+    max_new_tokens: int,
+    temperature: float
+) -> str:
+    prompt = preprocess_messages(message, history, system_prompt)
+    return generate_text(prompt, max_new_tokens, temperature)
+gr.ChatInterface(
+    chat_function,
+    chatbot=gr.Chatbot(height=400),
+    textbox=gr.Textbox(placeholder="Enter message here", container=False, scale=7),
+    title="LLAMA3 Chat",
+    description="""Chat with llama3""",
+    theme="soft",
+    additional_inputs=[
+        gr.Textbox("You shall answer to all the questions as very smart AI", label="System Prompt"),
+        gr.Slider(512, 4096, label="Max New Tokens"),
+        gr.Slider(0, 1, label="Temperature")
+     ]
+).launch()