Spaces:

AIMaster7
/

Mono

Running

App Files Files Community

AIMaster7 commited on 8 days ago

Commit

3f3c9af

verified ·

1 Parent(s): 374cc4b

Update main.py

Browse files

Files changed (1) hide show

main.py +53 -66

main.py CHANGED Viewed

@@ -1,11 +1,11 @@
 from fastapi import FastAPI, Request
 from fastapi.responses import StreamingResponse, JSONResponse
 from pydantic import BaseModel
-import requests
 import time
 import json
-from typing import List, Optional
-from models import AVAILABLE_MODELS
 app = FastAPI()
@@ -27,6 +27,8 @@ async def list_models():
 @app.post("/v1/chat/completions")
 async def chat_completion(request: ChatRequest):
     headers = {
         'accept': 'text/event-stream',
         'content-type': 'application/json',
@@ -37,96 +39,81 @@ async def chat_completion(request: ChatRequest):
     payload = {
         "messages": [{"role": msg.role, "content": msg.content} for msg in request.messages],
-        "model": request.model
     }
     if request.stream:
-        def event_stream():
             chat_id = f"chatcmpl-{unix_id()}"
             created = int(time.time())
             sent_done = False
-            with requests.post(
-                "https://www.chatwithmono.xyz/api/chat",
-                headers=headers,
-                json=payload,
-                stream=True,
-                timeout=120
-            ) as response:
-                for line in response.iter_lines(decode_unicode=True):
-                    if line.startswith("0:"):
-                        try:
-                            content_piece = json.loads(line[2:])
-                            chunk_data = {
-                                "id": chat_id,
-                                "object": "chat.completion.chunk",
-                                "created": created,
-                                "model": request.model,
-                                "choices": [
-                                    {
                                         "delta": {"content": content_piece},
                                         "index": 0,
                                         "finish_reason": None
-                                    }
-                                ]
-                            }
-                            yield f"data: {json.dumps(chunk_data)}\n\n"
-                        except:
-                            continue
-                    elif line.startswith(("e:", "d:")) and not sent_done:
-                        sent_done = True
-                        done_chunk = {
-                            "id": chat_id,
-                            "object": "chat.completion.chunk",
-                            "created": created,
-                            "model": request.model,
-                            "choices": [
-                                {
                                     "delta": {},
                                     "index": 0,
                                     "finish_reason": "stop"
-                                }
-                            ]
-                        }
-                        yield f"data: {json.dumps(done_chunk)}\n\ndata: [DONE]\n\n"
         return StreamingResponse(event_stream(), media_type="text/event-stream")
     else:
         assistant_response = ""
         usage_info = {}
-        with requests.post(
-            "https://www.chatwithmono.xyz/api/chat",
-            headers=headers,
-            json=payload,
-            stream=True,
-            timeout=120
-        ) as response:
-            for chunk in response.iter_lines(decode_unicode=True):
-                if chunk.startswith("0:"):
-                    try:
-                        piece = json.loads(chunk[2:])
-                        assistant_response += piece  # this is just a string fragment
-                    except:
-                        continue
-                elif chunk.startswith(("e:", "d:")):
-                    try:
-                        data = json.loads(chunk[2:])
-                        usage_info = data.get("usage", {})
-                    except:
-                        continue
         return JSONResponse(content={
             "id": f"chatcmpl-{unix_id()}",
             "object": "chat.completion",
             "created": int(time.time()),
-            "model": request.model,
             "choices": [{
                 "index": 0,
                 "message": {
                     "role": "assistant",
-                    "content": assistant_response  # correctly concatenated string
                 },
                 "finish_reason": "stop"
             }],
@@ -135,4 +122,4 @@ async def chat_completion(request: ChatRequest):
                 "completion_tokens": usage_info.get("completionTokens", 0),
                 "total_tokens": usage_info.get("promptTokens", 0) + usage_info.get("completionTokens", 0),
             }
-        })

 from fastapi import FastAPI, Request
 from fastapi.responses import StreamingResponse, JSONResponse
 from pydantic import BaseModel
+from typing import List, Optional
 import time
 import json
+import httpx
+from models import AVAILABLE_MODELS, MODEL_ALIASES
 app = FastAPI()
 @app.post("/v1/chat/completions")
 async def chat_completion(request: ChatRequest):
+    model_id = MODEL_ALIASES.get(request.model, request.model)
     headers = {
         'accept': 'text/event-stream',
         'content-type': 'application/json',
     payload = {
         "messages": [{"role": msg.role, "content": msg.content} for msg in request.messages],
+        "model": model_id
     }
     if request.stream:
+        async def event_stream():
             chat_id = f"chatcmpl-{unix_id()}"
             created = int(time.time())
             sent_done = False
+            async with httpx.AsyncClient(timeout=120) as client:
+                async with client.stream("POST", "https://www.chatwithmono.xyz/api/chat", headers=headers, json=payload) as response:
+                    async for line in response.aiter_lines():
+                        if line.startswith("0:"):
+                            try:
+                                content_piece = json.loads(line[2:])
+                                chunk_data = {
+                                    "id": chat_id,
+                                    "object": "chat.completion.chunk",
+                                    "created": created,
+                                    "model": model_id,
+                                    "choices": [{
                                         "delta": {"content": content_piece},
                                         "index": 0,
                                         "finish_reason": None
+                                    }]
+                                }
+                                yield f"data: {json.dumps(chunk_data)}\n\n"
+                            except:
+                                continue
+                        elif line.startswith(("e:", "d:")) and not sent_done:
+                            sent_done = True
+                            done_chunk = {
+                                "id": chat_id,
+                                "object": "chat.completion.chunk",
+                                "created": created,
+                                "model": model_id,
+                                "choices": [{
                                     "delta": {},
                                     "index": 0,
                                     "finish_reason": "stop"
+                                }]
+                            }
+                            yield f"data: {json.dumps(done_chunk)}\n\ndata: [DONE]\n\n"
         return StreamingResponse(event_stream(), media_type="text/event-stream")
     else:
         assistant_response = ""
         usage_info = {}
+        async with httpx.AsyncClient(timeout=120) as client:
+            async with client.stream("POST", "https://www.chatwithmono.xyz/api/chat", headers=headers, json=payload) as response:
+                async for chunk in response.aiter_lines():
+                    if chunk.startswith("0:"):
+                        try:
+                            piece = json.loads(chunk[2:])
+                            assistant_response += piece
+                        except:
+                            continue
+                    elif chunk.startswith(("e:", "d:")):
+                        try:
+                            data = json.loads(chunk[2:])
+                            usage_info = data.get("usage", {})
+                        except:
+                            continue
         return JSONResponse(content={
             "id": f"chatcmpl-{unix_id()}",
             "object": "chat.completion",
             "created": int(time.time()),
+            "model": model_id,
             "choices": [{
                 "index": 0,
                 "message": {
                     "role": "assistant",
+                    "content": assistant_response
                 },
                 "finish_reason": "stop"
             }],
                 "completion_tokens": usage_info.get("completionTokens", 0),
                 "total_tokens": usage_info.get("promptTokens", 0) + usage_info.get("completionTokens", 0),
             }
+        })