Spaces:
Runtime error
Runtime error
File size: 1,897 Bytes
8c77d9c 9449955 966212c 83c6483 e318921 9449955 966212c 9449955 966212c 8c77d9c 966212c 8c77d9c 966212c 9449955 966212c 69a8774 966212c 290f8e2 966212c 290f8e2 966212c 290f8e2 966212c 9449955 290f8e2 9449955 966212c 9449955 8c77d9c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
from fastapi import FastAPI, HTTPException, Request
from llama_cpp import Llama
import time
import os
app = FastAPI()
llm = Llama(model_path="llama-2-7b-chat.Q3_K_S.gguf", n_ctx=2048, n_batch=512, use_mlock=True, n_threads=8)
def typewrite(text, delay=0.01):
for char in text:
print(char, end='', flush=True)
time.sleep(delay)
print(end='', flush=True) # Print newline to move to the next line
@app.post("/chat")
async def chat(request: Request):
try:
data = await request.json()
user_input = data.get("user_input")
if not user_input:
raise HTTPException(status_code=400, detail="Missing 'user_input' field in the request JSON.")
os.system("cls")
print("Chatbot by Aritra Roy & DVLH")
ask = user_input
prompt = f"Llama-2-Chat [INST] <<SYS>>You're an assistant named Tusti. You are Developed by Aritra Roy. Don't share any false information.<</SYS>> {ask} [/INST]"
output_stream = llm(prompt, max_tokens=1024, echo=False, temperature=0.2, top_p=0.1, stream=True)
response_text_chunk = ''
async def generate_chunks():
nonlocal response_text_chunk
try:
while True:
chunk = next(output_stream)
if chunk.get('choices') and chunk['choices'][0].get('text'):
response_text_chunk += chunk['choices'][0]['text']
typewrite(response_text_chunk, delay=0.00)
yield {"response": response_text_chunk}
except StopIteration:
pass
if ask == 'clear':
os.system("cls")
return generate_chunks()
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860) |