Spaces:

Spestly
/

Nous-1

Running on Zero

App Files Files Community

Spestly commited on 15 days ago

Commit

d75f179

verified ·

1 Parent(s): 15171d8

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -2

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 import spaces
 from transformers import pipeline
 import torch
 # Global variable to store pipelines
 model_cache = {}
@@ -120,6 +121,69 @@ def generate_response(message, history, model_name, max_length=512, temperature=
     except Exception as e:
         return f"Error generating response: {str(e)}"
 # Create the Gradio interface
 def create_interface():
     with gr.Blocks(title="Multi-Model Chat", theme=gr.themes.Soft()) as demo:
@@ -222,7 +286,6 @@ def create_interface():
         ---
         ### About the Nous-1 Models
         **Nous-1-2B**: 2 billion parameter model by Apexion AI, designed for fast and quick infrencing
         **Nous-1-4B**: 4 billion parameter model by Apexion AI, optimisd for efficient conversation and text generation
@@ -239,4 +302,5 @@ def create_interface():
 # Launch the app
 if __name__ == "__main__":
     demo = create_interface()
-    demo.launch()

 import spaces
 from transformers import pipeline
 import torch
+from typing import List, Dict, Optional
 # Global variable to store pipelines
 model_cache = {}
     except Exception as e:
         return f"Error generating response: {str(e)}"
+@spaces.GPU
+def generate(
+    model: str,
+    user_input: str,
+    history: Optional[str] = "",
+    temperature: float = 0.7,
+    system_prompt: Optional[str] = "",
+    max_tokens: int = 512
+):
+    """
+    API endpoint for LLM generation
+    Args:
+        model: Model name to use (Nous-1-2B, Nous-1-4B, or Nous-1-8B)
+        user_input: Current user message/input
+        history: JSON string of conversation history in format [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]
+        temperature: Temperature for generation (0.1-2.0)
+        system_prompt: System prompt to guide the model
+        max_tokens: Maximum tokens to generate (1-8192)
+    Returns:
+        Generated response from the model
+    """
+    # Validate model
+    if model not in AVAILABLE_MODELS:
+        return f"Error: Model {model} not available. Available models: {list(AVAILABLE_MODELS.keys())}"
+    # Initialize model
+    try:
+        model_pipe = initialize_model(model)
+    except Exception as e:
+        return f"Error loading model {model}: {str(e)}"
+    # Parse history if provided and convert to gradio format
+    gradio_history = []
+    if history and history.strip():
+        try:
+            import json
+            history_list = json.loads(history)
+            current_pair = [None, None]
+            for msg in history_list:
+                if isinstance(msg, dict) and "role" in msg and "content" in msg:
+                    if msg["role"] == "user":
+                        if current_pair[0] is not None:
+                            gradio_history.append([current_pair[0], current_pair[1]])
+                        current_pair = [msg["content"], None]
+                    elif msg["role"] == "assistant":
+                        current_pair[1] = msg["content"]
+            if current_pair[0] is not None:
+                gradio_history.append([current_pair[0], current_pair[1]])
+        except:
+            # If history parsing fails, continue without history
+            pass
+    # Add system prompt to user input if provided
+    final_user_input = user_input
+    if system_prompt and system_prompt.strip():
+        final_user_input = f"System: {system_prompt}\n\nUser: {user_input}"
+    # Use the original generate_response function
+    return generate_response(final_user_input, gradio_history, model, max_tokens, temperature, 0.9)
 # Create the Gradio interface
 def create_interface():
     with gr.Blocks(title="Multi-Model Chat", theme=gr.themes.Soft()) as demo:
         ---
         ### About the Nous-1 Models
         **Nous-1-2B**: 2 billion parameter model by Apexion AI, designed for fast and quick infrencing
         **Nous-1-4B**: 4 billion parameter model by Apexion AI, optimisd for efficient conversation and text generation
 # Launch the app
 if __name__ == "__main__":
     demo = create_interface()
+    # Enable API and launch
+    demo.launch(share=True)