TEST

Sleeping

App Files Files Community

Reality123b commited on Nov 18, 2024

Commit

f69c6af

verified ·

1 Parent(s): 691f69e

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -117

app.py CHANGED Viewed

@@ -1,131 +1,74 @@
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
-import time
-# Model configuration
-MODEL_NAME = "Qwen/Qwen2-14B-Instruct"
-# Initialize model and tokenizer
-print("Loading model and tokenizer...")
 model = AutoModelForCausalLM.from_pretrained(
-    MODEL_NAME,
     torch_dtype="auto",
-    device_map="auto"
 )
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-print("Model and tokenizer loaded!")
-def simulate_typing(text, min_chars_per_sec=15, max_chars_per_sec=40):
-    """Simulate typing animation with variable speed."""
-    full_text = ""
-    words = text.split()
-    for i, word in enumerate(words):
-        full_text += word
-        if i < len(words) - 1:
-            full_text += " "
-        delay = 1 / (min_chars_per_sec + (max_chars_per_sec - min_chars_per_sec) * torch.rand(1).item())
-        time.sleep(delay)
-        yield full_text
-def generate_response(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens=512,
-    temperature=0.7,
-    top_p=0.95
-):
-    # Prepare conversation history
-    messages = [{"role": "system", "content": system_message}]
-    for user_msg, assistant_msg in history:
-        if user_msg:
-            messages.append({"role": "user", "content": user_msg})
-        if assistant_msg:
-            messages.append({"role": "assistant", "content": assistant_msg})
-    messages.append({"role": "user", "content": message})
-    # Convert messages to model input format
-    text = tokenizer.apply_chat_template(
-        messages,
-        tokenize=False,
-        add_generation_prompt=True
-    )
-    # Generate response
-    with torch.inference_mode():
-        model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
-        generated_ids = model.generate(
-            **model_inputs,
-            max_new_tokens=max_tokens,
-            temperature=temperature,
-            top_p=top_p,
-            do_sample=True,
-            pad_token_id=tokenizer.eos_token_id
-        )
-        generated_ids = generated_ids[0, len(model_inputs.input_ids[0]):]
-        response = tokenizer.decode(generated_ids, skip_special_tokens=True)
-    # Return response with typing animation
-    for partial_response in simulate_typing(response):
-        yield partial_response
-# Custom CSS with typing cursor animation
-custom_css = """
-@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600&display=swap');
-body, .gradio-container {
-    font-family: 'Inter', sans-serif;
-}
-.typing-cursor::after {
-    content: '|';
-    animation: blink 1s step-start infinite;
-}
-@keyframes blink {
-    50% { opacity: 0; }
-}
-"""
-# System message
-system_message = """You are Qwen 2.5 14B, an advanced AI assistant created by Alibaba Cloud.
-You are knowledgeable, helpful, and strive to provide accurate and comprehensive responses."""
-# Gradio chat interface
-demo = gr.ChatInterface(
-    generate_response,
-    additional_inputs=[
-        gr.Textbox(
-            value=system_message,
-            visible=False,
-        ),
-        gr.Slider(
-            minimum=1,
-            maximum=2048,
-            value=512,
-            step=1,
-            label="Max new tokens"
-        ),
-        gr.Slider(
-            minimum=0.1,
-            maximum=2.0,
-            value=0.7,
-            step=0.1,
-            label="Temperature"
-        ),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)"
-        ),
-    ],
-    css=custom_css,
-    title="Qwen 2.5 14B Chat",
-    description="An advanced AI assistant powered by Qwen 2.5 14B"
-)
-# Launch the demo
-if __name__ == "__main__":
-    demo.queue(max_size=40)
-    demo.launch(max_threads=40)

 import gradio as gr
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+# Set seed for reproducibility
+torch.random.manual_seed(0)
+# Load the model and tokenizer
 model = AutoModelForCausalLM.from_pretrained(
+    "microsoft/Phi-3.5-mini-instruct",
+    device_map="cuda",
     torch_dtype="auto",
+    trust_remote_code=True,
 )
+tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct")
+# Define the pipeline
+pipe = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+)
+# System message (invisible to the user)
+SYSTEM_MESSAGE = {"role": "system", "content": "You are a helpful AI assistant."}
+# Function to process the user input and generate output
+def chatbot_response(conversation_history):
+    # Build message sequence
+    messages = [SYSTEM_MESSAGE] + [
+        {"role": "user", "content": message["user_input"]} for message in conversation_history
+    ]
+    # Pass messages to the model
+    generation_args = {
+        "max_new_tokens": 500,
+        "return_full_text": False,
+        "temperature": 0.0,
+        "do_sample": False,
+    }
+    output = pipe(messages, **generation_args)
+    assistant_reply = output[0]["generated_text"]
+    # Append assistant's response to history
+    conversation_history[-1]["assistant_reply"] = assistant_reply
+    return conversation_history
+# Define Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# AI Chatbot with System Message")
+    with gr.Row():
+        with gr.Column():
+            chatbox = gr.Chatbot()
+            input_box = gr.Textbox(label="Your Message")
+            submit_btn = gr.Button("Submit")
+    conversation_state = gr.State([])  # Maintain conversation history
+    def update_conversation(user_input, history):
+        if user_input.strip():
+            history.append({"user_input": user_input})
+            updated_history = chatbot_response(history)
+            return updated_history, ""
+        return history, ""
+    submit_btn.click(
+        update_conversation,
+        inputs=[input_box, conversation_state],
+        outputs=[conversation_state, input_box],
+    )
+    chatbox.update(chatbot_response(conversation_state))
+# Launch the interface
+demo.launch()