Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,6 +2,7 @@ import gradio as gr
|
|
| 2 |
from transformers import pipeline, TextIteratorStreamer
|
| 3 |
from threading import Thread
|
| 4 |
import torch
|
|
|
|
| 5 |
import subprocess
|
| 6 |
import spaces
|
| 7 |
import os
|
|
@@ -60,7 +61,7 @@ Below this is the role you are to play.
|
|
| 60 |
# Install flash-attn
|
| 61 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
| 62 |
# Initialize the model pipeline
|
| 63 |
-
generator = pipeline('text-generation', model='Locutusque/Open-Thespis-Llama-3B', torch_dtype=torch.bfloat16)
|
| 64 |
@spaces.GPU
|
| 65 |
def generate_text(prompt, system_prompt, temperature, top_p, top_k, repetition_penalty, max_length):
|
| 66 |
"""
|
|
|
|
| 2 |
from transformers import pipeline, TextIteratorStreamer
|
| 3 |
from threading import Thread
|
| 4 |
import torch
|
| 5 |
+
import os
|
| 6 |
import subprocess
|
| 7 |
import spaces
|
| 8 |
import os
|
|
|
|
| 61 |
# Install flash-attn
|
| 62 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
| 63 |
# Initialize the model pipeline
|
| 64 |
+
generator = pipeline('text-generation', model='Locutusque/Open-Thespis-Llama-3B', torch_dtype=torch.bfloat16, token=os.getenv("TOKEN"))
|
| 65 |
@spaces.GPU
|
| 66 |
def generate_text(prompt, system_prompt, temperature, top_p, top_k, repetition_penalty, max_length):
|
| 67 |
"""
|