Spaces:

Priyanshukr-1
/

openhermes_mistral_API

Sleeping

openhermes_mistral_API / app.py

Update app.py

d913a01 verified about 1 month ago

1.54 kB

	from fastapi import FastAPI, Request
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download
	import os

	app = FastAPI()

	# === Model Config ===
	REPO_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
	FILENAME = "mistral-7b-instruct-v0.1.Q4_K_M.gguf"
	MODEL_DIR = "models"
	MODEL_PATH = os.path.join(MODEL_DIR, FILENAME)

	# === Download only if not already present ===
	if not os.path.exists(MODEL_PATH):
	print(f"Downloading model {FILENAME} from Hugging Face...")
	model_path = hf_hub_download(
	repo_id=REPO_ID,
	filename=FILENAME,
	cache_dir=MODEL_DIR,
	local_dir=MODEL_DIR,
	local_dir_use_symlinks=False
	)
	else:
	print(f"Model already exists at: {MODEL_PATH}")
	model_path = MODEL_PATH

	# === Load LLM ===
	llm = Llama(
	model_path=model_path,
	n_ctx=1024,
	n_threads=4, # Adjust for your CPU
	n_batch=64
	)

	@app.get("/")
	def root():
	return {"message": "Mistral API is live!"}

	@app.post("/generate")
	async def generate(request: Request):
	data = await request.json()
	prompt = data.get("prompt", "")

	print("🧾 Received prompt:", prompt)

	response = llm.create_chat_completion(
	messages=[
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": prompt}
	],
	max_tokens=1024,
	temperature=0.7,
	)

	print("📤 Raw model response:", response)

	llm.reset()

	return {
	"response": response["choices"][0]["message"]["content"].strip()
	}