ameliakris's picture
Update Dockerfile and improve error handling
613c8f7
from transformers import pipeline
import torch
import os
from dotenv import load_dotenv
load_dotenv()
class LLMPipeline:
def __init__(self):
model_id = os.getenv("HF_MODEL_ID", "mradermacher/Huihui-gemma-3n-E4B-it-abliterated-GGUF")
try:
# Try to use CUDA if available
if torch.cuda.is_available():
device = "cuda"
dtype = torch.float16
else:
device = "cpu"
dtype = torch.float32
self.pipeline = pipeline(
"text-generation",
model=model_id,
torch_dtype=dtype,
device_map="auto" if device == "cuda" else None,
model_kwargs={"low_cpu_mem_usage": True}
)
except Exception as e:
print(f"Error loading model: {e}")
raise
async def generate(self, prompt: str, max_length: int = 100) -> str:
"""Generate text using the local Gemma model."""
try:
result = self.pipeline(
prompt,
max_length=max_length,
num_return_sequences=1,
temperature=0.7,
top_p=0.9
)
return result[0]['generated_text']
except Exception as e:
print(f"Error in LLM generation: {e}")
return ""