Spaces:

Spestly
/

Nous-1

Running on Zero

Spestly commited on 26 days ago

Commit

dc45496

verified ·

1 Parent(s): e043807

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,23 +3,28 @@ import spaces
 from transformers import pipeline
 import torch
-# Initialize the pipeline with the Orion model
 @spaces.GPU
 def initialize_model():
-    return pipeline(
-        "text-generation",
-        model="apexion-ai/Orion-V1-4B",
-        torch_dtype=torch.float16,
-        device_map="auto"
-    )
-# Load the model
-pipe = initialize_model()
 @spaces.GPU
 def generate_response(message, history, max_length=512, temperature=0.7, top_p=0.9):
     """Generate response using the Orion model"""
     # Format the conversation history
     messages = []
@@ -34,13 +39,13 @@ def generate_response(message, history, max_length=512, temperature=0.7, top_p=0
     # Generate response
     try:
-        response = pipe(
             messages,
             max_length=max_length,
             temperature=temperature,
             top_p=top_p,
             do_sample=True,
-            pad_token_id=pipe.tokenizer.eos_token_id
         )
         # Extract the generated text

 from transformers import pipeline
 import torch
+# Global variable to store the pipeline
+pipe = None
 @spaces.GPU
 def initialize_model():
+    global pipe
+    if pipe is None:
+        pipe = pipeline(
+            "text-generation",
+            model="apexion-ai/Orion-V1-4B",
+            torch_dtype=torch.float16,
+            device_map="auto"
+        )
+    return pipe
 @spaces.GPU
 def generate_response(message, history, max_length=512, temperature=0.7, top_p=0.9):
     """Generate response using the Orion model"""
+    # Initialize model inside the GPU-decorated function
+    model_pipe = initialize_model()
     # Format the conversation history
     messages = []
     # Generate response
     try:
+        response = model_pipe(
             messages,
             max_length=max_length,
             temperature=temperature,
             top_p=top_p,
             do_sample=True,
+            pad_token_id=model_pipe.tokenizer.eos_token_id
         )
         # Extract the generated text