Manofem commited on
Commit
4019da3
·
1 Parent(s): 0f60136

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -38
app.py CHANGED
@@ -1,55 +1,34 @@
1
- from fastapi import FastAPI, HTTPException, Request
2
- from llama_cpp import Llama
3
  import time
4
- import os
5
 
6
  app = FastAPI()
7
 
8
- llm = Llama(model_path="llama-2-7b-chat.Q3_K_S.gguf", n_ctx=2048, n_batch=512, use_mlock=True, n_threads=8)
 
9
 
10
- def typewrite(text, delay=0.01):
11
- for char in text:
12
- print(char, end='', flush=True)
13
- time.sleep(delay)
14
- print(end='', flush=True) # Print newline to move to the next line
15
 
16
- @app.post("/chat")
17
- async def chat(request: Request):
18
- try:
19
- data = await request.json()
20
- user_input = data.get("user_input")
21
-
22
- if not user_input:
23
- raise HTTPException(status_code=400, detail="Missing 'user_input' field in the request JSON.")
24
-
25
- os.system("cls")
26
- print("Chatbot by Aritra Roy & DVLH")
27
-
28
- ask = user_input
29
-
30
- prompt = f"Llama-2-Chat [INST] <<SYS>>You're an assistant named Tusti. You are Developed by Aritra Roy. Don't share any false information.<</SYS>> {ask} [/INST]"
31
-
32
- output_stream = llm(prompt, max_tokens=1024, echo=False, temperature=0.2, top_p=0.1, stream=True)
33
-
34
- response_text_chunk = ''
35
 
 
36
  while True:
37
  try:
38
  chunk = next(output_stream)
39
  if chunk.get('choices') and chunk['choices'][0].get('text'):
40
- response_text_chunk += chunk['choices'][0]['text']
41
- typewrite(response_text_chunk, delay=0.00)
42
  except StopIteration:
43
  break
 
 
44
 
45
- if ask == 'clear':
46
- os.system("cls")
47
-
48
- return {"response": response_text_chunk}
49
-
50
- except Exception as e:
51
- raise HTTPException(status_code=500, detail=str(e))
52
 
53
  if __name__ == "__main__":
54
  import uvicorn
55
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
1
+ from fastapi import FastAPI, Form, HTMLResponse
2
+ from fastapi.responses import StreamingResponse
3
  import time
 
4
 
5
  app = FastAPI()
6
 
7
+ def generate_responses(prompt):
8
+ from llama_cpp import Llama
9
 
10
+ llm = Llama(model_path="llama-2-7b-chat.Q3_K_S.gguf", n_ctx=2048, n_batch=512, use_mlock=True, n_threads=8)
 
 
 
 
11
 
12
+ output_stream = llm(prompt, max_tokens=1024, echo=False, temperature=0.2, top_p=0.1, stream=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ try:
15
  while True:
16
  try:
17
  chunk = next(output_stream)
18
  if chunk.get('choices') and chunk['choices'][0].get('text'):
19
+ response_text_chunk = chunk['choices'][0]['text']
20
+ yield response_text_chunk
21
  except StopIteration:
22
  break
23
+ except StopIteration:
24
+ pass
25
 
26
+ @app.post("/chat")
27
+ async def chat(input_text: str = Form(...)):
28
+ prompt = f"Llama-2-Chat [INST] <<SYS>>You're a assistant named Tusti.You are Developed by Aritra Roy.Don't share any false information.<</SYS>> {input_text} [/INST]"
29
+ return StreamingResponse(generate_responses(prompt), media_type="text/plain")
 
 
 
30
 
31
  if __name__ == "__main__":
32
  import uvicorn
33
+
34
+ uvicorn.run(app, host="127.0.0.1", port=8000)