File size: 2,016 Bytes
8c77d9c
f86bd34
9449955
966212c
83c6483
e318921
9449955
 
 
 
966212c
 
 
 
 
9449955
966212c
8c77d9c
966212c
8c77d9c
 
 
 
 
 
966212c
 
9449955
966212c
69a8774
966212c
 
 
 
290f8e2
f86bd34
 
966212c
290f8e2
 
 
 
 
f86bd34
966212c
290f8e2
966212c
 
 
9449955
f86bd34
9449955
966212c
166ed2c
 
9449955
 
 
8c77d9c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import EventSourceResponse
from llama_cpp import Llama
import time
import os

app = FastAPI()

llm = Llama(model_path="llama-2-7b-chat.Q3_K_S.gguf", n_ctx=2048, n_batch=512, use_mlock=True, n_threads=8)

def typewrite(text, delay=0.01):
    for char in text:
        print(char, end='', flush=True)
        time.sleep(delay)
    print(end='', flush=True)  # Print newline to move to the next line

@app.post("/chat")
async def chat(request: Request):
    try:
        data = await request.json()
        user_input = data.get("user_input")

        if not user_input:
            raise HTTPException(status_code=400, detail="Missing 'user_input' field in the request JSON.")

        os.system("cls")
        print("Chatbot by Aritra Roy & DVLH")

        ask = user_input

        prompt = f"Llama-2-Chat [INST] <<SYS>>You're an assistant named Tusti. You are Developed by Aritra Roy. Don't share any false information.<</SYS>> {ask} [/INST]"

        output_stream = llm(prompt, max_tokens=1024, echo=False, temperature=0.2, top_p=0.1, stream=True)

        async def generate_chunks():
            nonlocal output_stream
            response_text_chunk = ''
            try:
                while True:
                    chunk = next(output_stream)
                    if chunk.get('choices') and chunk['choices'][0].get('text'):
                        response_text_chunk += chunk['choices'][0]['text']
                        typewrite(response_text_chunk, delay=0.00)
                        yield f"data: {response_text_chunk}\n\n"
            except StopIteration:
                pass

        if ask == 'clear':
            os.system("cls")

        return EventSourceResponse(generate_chunks())

    except Exception as e:
        print(f"Exception: {e}")
        raise HTTPException(status_code=500, detail="Internal Server Error")

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)