File size: 1,897 Bytes
8c77d9c
9449955
966212c
83c6483
e318921
9449955
 
 
 
966212c
 
 
 
 
9449955
966212c
8c77d9c
966212c
8c77d9c
 
 
 
 
 
966212c
 
9449955
966212c
69a8774
966212c
 
 
 
 
 
290f8e2
 
966212c
290f8e2
 
 
 
 
 
966212c
290f8e2
966212c
 
 
9449955
290f8e2
9449955
966212c
 
9449955
 
 
8c77d9c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from fastapi import FastAPI, HTTPException, Request
from llama_cpp import Llama
import time
import os

app = FastAPI()

llm = Llama(model_path="llama-2-7b-chat.Q3_K_S.gguf", n_ctx=2048, n_batch=512, use_mlock=True, n_threads=8)

def typewrite(text, delay=0.01):
    for char in text:
        print(char, end='', flush=True)
        time.sleep(delay)
    print(end='', flush=True)  # Print newline to move to the next line

@app.post("/chat")
async def chat(request: Request):
    try:
        data = await request.json()
        user_input = data.get("user_input")

        if not user_input:
            raise HTTPException(status_code=400, detail="Missing 'user_input' field in the request JSON.")

        os.system("cls")
        print("Chatbot by Aritra Roy & DVLH")

        ask = user_input

        prompt = f"Llama-2-Chat [INST] <<SYS>>You're an assistant named Tusti. You are Developed by Aritra Roy. Don't share any false information.<</SYS>> {ask} [/INST]"

        output_stream = llm(prompt, max_tokens=1024, echo=False, temperature=0.2, top_p=0.1, stream=True)

        response_text_chunk = ''

        async def generate_chunks():
            nonlocal response_text_chunk
            try:
                while True:
                    chunk = next(output_stream)
                    if chunk.get('choices') and chunk['choices'][0].get('text'):
                        response_text_chunk += chunk['choices'][0]['text']
                        typewrite(response_text_chunk, delay=0.00)
                        yield {"response": response_text_chunk}
            except StopIteration:
                pass

        if ask == 'clear':
            os.system("cls")

        return generate_chunks()

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)