from llama_cpp import Llama from huggingface_hub import hf_hub_download # Download the single GGUF shard by its repo path: model_path = hf_hub_download( repo_id="Inventors-Hub/SwarmChat-models", repo_type="model", filename="EuroLLM-9B-Instruct-Q4_K_M.gguf", ) # llm = Llama(model_path=model_path, n_ctx=1024)#, verbose=True) llm = Llama( model_path=model_path, n_ctx=512, # down from 4096 low_vram=True, # llama.cpp low-vram mode f16_kv=True, # half-precision kv cache use_mmap=True, # mmap file use_mlock=False, ) # print("Llama backend initialized successfully!") # Function to process text using EuroLLM def translate_text(text): input_prompt = f""" <|im_start|>system <|im_end|> <|im_start|>user Translate the following text to English: Text: {text} English: <|im_end|> <|im_start|>assistant """ output = llm(input_prompt, max_tokens=1024, temperature=0) translated_text = output.get("choices", [{}])[0].get("text", "").strip() return translated_text