Spaces:

Spestly
/

Nous-1

Running on Zero

App Files Files Community

Spestly commited on 25 days ago

Commit

949aa02

verified ·

1 Parent(s): e5eb33d

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -46

app.py CHANGED Viewed

@@ -3,27 +3,55 @@ import spaces
 from transformers import pipeline
 import torch
-# Global variable to store the pipeline
-pipe = None
 @spaces.GPU
-def initialize_model():
-    global pipe
-    if pipe is None:
-        pipe = pipeline(
-            "text-generation",
-            model="apexion-ai/Nous-V1-4B",
-            torch_dtype=torch.float16,
-            device_map="auto"
-        )
-    return pipe
 @spaces.GPU
-def generate_response(message, history, max_length=512, temperature=0.7, top_p=0.9):
-    """Generate response using the Orion model"""
     # Initialize model inside the GPU-decorated function
-    model_pipe = initialize_model()
     # Format the conversation history
     messages = []
@@ -39,24 +67,52 @@ def generate_response(message, history, max_length=512, temperature=0.7, top_p=0
     # Generate response
     try:
-        response = model_pipe(
-            messages,
-            max_length=max_length,
-            temperature=temperature,
-            top_p=top_p,
-            do_sample=True,
-            pad_token_id=model_pipe.tokenizer.eos_token_id
-        )
         # Extract the generated text
-        generated_text = response[0]['generated_text']
-        # Get the last assistant message
         if isinstance(generated_text, list):
             assistant_response = generated_text[-1]['content']
         else:
-            # Fallback parsing if needed
-            assistant_response = str(generated_text).split("assistant")[-1].strip()
         return assistant_response
@@ -65,18 +121,28 @@ def generate_response(message, history, max_length=512, temperature=0.7, top_p=0
 # Create the Gradio interface
 def create_interface():
-    with gr.Blocks(title="Nous-V1-4B Chat", theme=gr.themes.Soft()) as demo:
         gr.Markdown("""
-        # 🚀 Nous-V1-4B Chat
-        Chat with the Nous-V1-4B model by Apexion AI. This is a 4B parameter language model optimized for conversation.
-        **Model:** `apexion-ai/Nous-V1-4B`
         """)
         chatbot = gr.Chatbot(
             height=400,
-            placeholder="Start chatting with Nous-V1-4B...",
             label="Chat"
         )
@@ -96,33 +162,37 @@ def create_interface():
                 maximum=8192,
                 value=2048,
                 step=50,
-                label="Max Length"
             )
             temperature = gr.Slider(
                 minimum=0.1,
                 maximum=2.0,
                 value=0.7,
                 step=0.1,
-                label="Temperature"
             )
             top_p = gr.Slider(
                 minimum=0.1,
                 maximum=1.0,
                 value=0.9,
                 step=0.1,
-                label="Top P"
             )
         # Event handlers
         def user_message(message, history):
             return "", history + [[message, None]]
-        def bot_response(history, max_len, temp, top_p):
             if history:
                 user_message = history[-1][0]
                 bot_message = generate_response(
                     user_message,
                     history[:-1],
                     max_len,
                     temp,
                     top_p
@@ -130,31 +200,34 @@ def create_interface():
                 history[-1][1] = bot_message
             return history
         # Wire up the events
         msg.submit(user_message, [msg, chatbot], [msg, chatbot]).then(
-            bot_response, [chatbot, max_length, temperature, top_p], chatbot
         )
         submit_btn.click(user_message, [msg, chatbot], [msg, chatbot]).then(
-            bot_response, [chatbot, max_length, temperature, top_p], chatbot
         )
         clear_btn.click(lambda: None, None, chatbot, queue=False)
         gr.Markdown("""
         ---
-        ### About Nous-V1-4B
-        Nous-V1-4B is a 4 billion parameter language model developed by Apexion AI.
-        It's designed for efficient text generation and conversation.
-        **Features:**
-        - 4B parameters for efficient inference
-        - Optimizsd for conversational AI
-        - Supports various text generation tasks
-        This Space uses ZeroGPU for efficient GPU allocation.
         """)
     return demo

 from transformers import pipeline
 import torch
+# Global variable to store pipelines
+model_cache = {}
+# Available models
+AVAILABLE_MODELS = {
+    "Nous-V1-4B": "apexion-ai/Nous-V1-4B",
+    "Nous-V1-8B": "apexion-ai/Nous-V1-8B",
+}
 @spaces.GPU
+def initialize_model(model_name):
+    global model_cache
+    if model_name not in AVAILABLE_MODELS:
+        raise ValueError(f"Model {model_name} not found in available models")
+    model_id = AVAILABLE_MODELS[model_name]
+    # Check if model is already cached
+    if model_id not in model_cache:
+        try:
+            model_cache[model_id] = pipeline(
+                "text-generation",
+                model=model_id,
+                torch_dtype=torch.float16,
+                device_map="auto",
+                trust_remote_code=True
+            )
+        except Exception as e:
+            # Fallback to CPU if GPU fails
+            model_cache[model_id] = pipeline(
+                "text-generation",
+                model=model_id,
+                torch_dtype=torch.float32,
+                device_map="cpu",
+                trust_remote_code=True
+            )
+    return model_cache[model_id]
 @spaces.GPU
+def generate_response(message, history, model_name, max_length=512, temperature=0.7, top_p=0.9):
+    """Generate response using the selected model"""
     # Initialize model inside the GPU-decorated function
+    try:
+        model_pipe = initialize_model(model_name)
+    except Exception as e:
+        return f"Error loading model {model_name}: {str(e)}"
     # Format the conversation history
     messages = []
     # Generate response
     try:
+        # Some models may not support the messages format, so we'll try different approaches
+        try:
+            # Try with messages format first
+            response = model_pipe(
+                messages,
+                max_length=max_length,
+                temperature=temperature,
+                top_p=top_p,
+                do_sample=True,
+                pad_token_id=model_pipe.tokenizer.eos_token_id,
+                return_full_text=False
+            )
+        except:
+            # Fallback to simple text format
+            conversation_text = ""
+            for msg in messages:
+                if msg["role"] == "user":
+                    conversation_text += f"User: {msg['content']}\n"
+                else:
+                    conversation_text += f"Assistant: {msg['content']}\n"
+            conversation_text += "Assistant:"
+            response = model_pipe(
+                conversation_text,
+                max_length=max_length,
+                temperature=temperature,
+                top_p=top_p,
+                do_sample=True,
+                pad_token_id=model_pipe.tokenizer.eos_token_id,
+                return_full_text=False
+            )
         # Extract the generated text
+        if isinstance(response, list) and len(response) > 0:
+            generated_text = response[0]['generated_text']
+        else:
+            generated_text = str(response)
+        # Clean up the response
         if isinstance(generated_text, list):
             assistant_response = generated_text[-1]['content']
         else:
+            # Remove the prompt and extract assistant response
+            assistant_response = str(generated_text).strip()
+            if "Assistant:" in assistant_response:
+                assistant_response = assistant_response.split("Assistant:")[-1].strip()
         return assistant_response
 # Create the Gradio interface
 def create_interface():
+    with gr.Blocks(title="Multi-Model Chat", theme=gr.themes.Soft()) as demo:
         gr.Markdown("""
+        # 🚀 Nous-V1 Model Chat Interface
+        Chat with the Nous-V1 models by Apexion AI. Choose between the 4B and 8B parameter versions.
+        **Available Models:**
+        - Nous-V1-4B (4 billion parameters)
+        - Nous-V1-8B (8 billion parameters)
         """)
+        with gr.Row():
+            model_selector = gr.Dropdown(
+                choices=list(AVAILABLE_MODELS.keys()),
+                value="Nous-V1-4B",
+                label="Select Model",
+                info="Choose which model to use for generation"
+            )
         chatbot = gr.Chatbot(
             height=400,
+            placeholder="Select a model and start chatting...",
             label="Chat"
         )
                 maximum=8192,
                 value=2048,
                 step=50,
+                label="Max Length",
+                info="Maximum length of generated response"
             )
             temperature = gr.Slider(
                 minimum=0.1,
                 maximum=2.0,
                 value=0.7,
                 step=0.1,
+                label="Temperature",
+                info="Controls randomness in generation"
             )
             top_p = gr.Slider(
                 minimum=0.1,
                 maximum=1.0,
                 value=0.9,
                 step=0.1,
+                label="Top P",
+                info="Controls diversity via nucleus sampling"
             )
         # Event handlers
         def user_message(message, history):
             return "", history + [[message, None]]
+        def bot_response(history, model_name, max_len, temp, top_p):
             if history:
                 user_message = history[-1][0]
                 bot_message = generate_response(
                     user_message,
                     history[:-1],
+                    model_name,
                     max_len,
                     temp,
                     top_p
                 history[-1][1] = bot_message
             return history
+        def model_changed(model_name):
+            return gr.update(placeholder=f"Chat with {model_name}...")
         # Wire up the events
         msg.submit(user_message, [msg, chatbot], [msg, chatbot]).then(
+            bot_response, [chatbot, model_selector, max_length, temperature, top_p], chatbot
         )
         submit_btn.click(user_message, [msg, chatbot], [msg, chatbot]).then(
+            bot_response, [chatbot, model_selector, max_length, temperature, top_p], chatbot
         )
         clear_btn.click(lambda: None, None, chatbot, queue=False)
+        model_selector.change(model_changed, model_selector, chatbot)
         gr.Markdown("""
         ---
+        ### About the Nous-V1 Models
+        **Nous-V1-4B**: 4 billion parameter model by Apexion AI, optimized for efficient conversation and text generation
+        **Nous-V1-8B**: 8 billion parameter model by Apexion AI, offering enhanced capabilities and better performance for complex tasks
+        Both models are designed for conversational AI and support various text generation tasks. The 8B model provides more sophisticated responses but requires more computational resources.
+        This Space uses ZeroGPU for efficient GPU allocation across both model sizes.
         """)
     return demo