import os from huggingface_hub import hf_hub_download import gradio as gr from langchain.callbacks.manager import CallbackManager from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain_community.llms import LlamaCpp model_path = "MasherAI-7B-v3-GGUF-unsloth.Q4_K_M.gguf" def llm_load() -> LlamaCpp: callback = CallbackManager([StreamingStdOutCallbackHandler()]) model = LlamaCpp( model_path=model_path, temperature=0.7, max_tokens=2000, top_p=1, callback_manager=callback, verbose=True ) return model llm = llm_load() def generate_response(user_input): model_prompt = f"<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n" response = llm(model_prompt) return response iface = gr.Interface(fn=generate_response, inputs="text", outputs="text", title="MasherAI-7B Model", description="A simple interface for interacting with the MasherAI-7B model.") if __name__ == "__main__": iface.launch()