Spaces:

Athspi
/

Tttt

Sleeping

App Files Files Community

Athspi commited on Mar 18

Commit

c21b225

verified ·

1 Parent(s): 8129ab8

Create app.py

Browse files

Files changed (1) hide show

app.py +65 -0

app.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+# Load model and tokenizer
+model_id = "suayptalha/FastLlama-3.2-3B-Instruct"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    torch_dtype=torch.float16,
+    device_map="auto"
+)
+# System prompt
+system_prompt = "You are a friendly assistant named FastLlama."
+def format_prompt(message: str, history: list):
+    prompt = f"<|system|>\n{system_prompt}</s>\n"
+    for user_msg, bot_msg in history:
+        prompt += f"<|user|>\n{user_msg}</s>\n<|assistant|>\n{bot_msg}</s>\n"
+    prompt += f"<|user|>\n{message}</s>\n<|assistant|>\n"
+    return prompt
+def respond(message: str, history: list):
+    # Format the prompt with chat history
+    full_prompt = format_prompt(message, history)
+    # Tokenize input
+    inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
+    # Generate response
+    output = model.generate(
+        inputs.input_ids,
+        max_new_tokens=256,
+        temperature=0.7,
+        top_p=0.9,
+        repetition_penalty=1.1,
+        do_sample=True,
+        pad_token_id=tokenizer.eos_token_id
+    )
+    # Decode response
+    response = tokenizer.decode(
+        output[0][inputs.input_ids.shape[-1]:],
+        skip_special_tokens=True
+    )
+    return response
+# Create chat interface
+chat = gr.ChatInterface(
+    fn=respond,
+    title="FastLlama-3.2B Chat",
+    description="Chat with FastLlama-3.2-3B-Instruct AI assistant",
+    examples=[
+        ["Explain quantum computing in simple terms"],
+        ["Write a poem about artificial intelligence"],
+        ["What's the meaning of life?"]
+    ],
+    cache_examples=False
+)
+# Launch the app
+if __name__ == "__main__":
+    chat.launch(server_name="0.0.0.0")