Spaces:

lpetrl
/

demo-llm

Sleeping

demo-llm / main.py

Petro

First model version

48afe02 over 1 year ago

736 Bytes

	from langchain.llms import CTransformers
	from fastapi import FastAPI
	from pydantic import BaseModel

	file_name = "zephyr-7b-beta.Q4_K_S.gguf"
	config = {
	"max_new_tokens": 1024,
	"model_type": "mistral",
	# "stream": True,
	}
	llm = CTransformers(
	model=file_name,
	**config
	)


	class validation(BaseModel):
	prompt: str
	#Fast API


	app = FastAPI()

	@app.post("/llm_on_cpu")
	async def stream(item: validation):
	system_prompt = 'Below is an instruction that describes a task. Write a response that appropriately completes the request.'
	E_INST = "</s>"
	user, assistant = "<\|user\|>", "<\|assistant\|>"
	prompt = f"{system_prompt}{E_INST}\n{user}\n{item.prompt}{E_INST}\n{assistant}\n"

	return llm(prompt)