Spaces:
Build error
Build error
from transformers import pipeline | |
import torch | |
import os | |
from dotenv import load_dotenv | |
load_dotenv() | |
class LLMPipeline: | |
def __init__(self): | |
model_id = os.getenv("HF_MODEL_ID", "mradermacher/Huihui-gemma-3n-E4B-it-abliterated-GGUF") | |
try: | |
# Try to use CUDA if available | |
if torch.cuda.is_available(): | |
device = "cuda" | |
dtype = torch.float16 | |
else: | |
device = "cpu" | |
dtype = torch.float32 | |
self.pipeline = pipeline( | |
"text-generation", | |
model=model_id, | |
torch_dtype=dtype, | |
device_map="auto" if device == "cuda" else None, | |
model_kwargs={"low_cpu_mem_usage": True} | |
) | |
except Exception as e: | |
print(f"Error loading model: {e}") | |
raise | |
async def generate(self, prompt: str, max_length: int = 100) -> str: | |
"""Generate text using the local Gemma model.""" | |
try: | |
result = self.pipeline( | |
prompt, | |
max_length=max_length, | |
num_return_sequences=1, | |
temperature=0.7, | |
top_p=0.9 | |
) | |
return result[0]['generated_text'] | |
except Exception as e: | |
print(f"Error in LLM generation: {e}") | |
return "" | |