Niansuh commited on
Commit
856118e
·
verified ·
1 Parent(s): 5184ad9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -34
app.py CHANGED
@@ -1,11 +1,11 @@
1
  import json
2
- import sseclient
3
  import random
4
- import aiohttp
5
  import asyncio
6
- from flask import Flask, request, Response, stream_with_context
 
 
7
 
8
- app = Flask(__name__)
9
 
10
  def generate_random_ip():
11
  return f"{random.randint(1, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}"
@@ -43,15 +43,44 @@ def format_openai_response(content, finish_reason=None):
43
  }]
44
  }
45
 
46
- @app.route('/hf/v1/chat/completions', methods=['POST'])
47
- async def chat_completions():
48
- data = request.json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  messages = data.get('messages', [])
50
  stream = data.get('stream', False)
51
-
52
  if not messages:
53
  return {"error": "No messages provided"}, 400
54
-
55
  model = data.get('model', 'gpt-4o')
56
  if model.startswith('gpt'):
57
  endpoint = "openAI"
@@ -74,7 +103,7 @@ async def chat_completions():
74
  while True:
75
  conversation = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
76
  conversation += "\nPlease follow and reply to the user’s recent messages and avoid answers that summarize the conversation history."
77
-
78
  payload = {
79
  "text": conversation,
80
  "endpoint": endpoint,
@@ -82,32 +111,52 @@ async def chat_completions():
82
  }
83
 
84
  async with aiohttp.ClientSession() as session:
85
- async with session.post(original_api_url, headers=headers, json=payload) as response:
86
- async for line in response.content:
87
- if line.startswith(b'data: '):
88
- event_data = line.decode().lstrip('data: ')
89
- event = json.loads(event_data)
90
- if event.get("final"):
91
- yield f"data: {json.dumps(format_openai_response('', 'stop'))}\n\n"
92
- return
93
- new_content = event.get('text', '')[len(full_response):]
94
- if new_content:
95
- full_response = event.get('text', '')
96
- yield f"data: {json.dumps(format_openai_response(new_content))}\n\n"
97
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  if stream:
99
- return Response(stream_with_context(generate()), content_type='text/event-stream')
100
  else:
101
  full_response = ""
102
  finish_reason = "stop"
103
  async for chunk in generate():
104
- response_data = json.loads(chunk[6:])
105
- if 'choices' in response_data and response_data['choices']:
106
- delta = response_data['choices'][0].get('delta', {})
107
- if 'content' in delta:
108
- full_response += delta['content']
109
- if 'finish_reason' in delta:
110
- finish_reason = delta['finish_reason']
 
111
 
112
  return {
113
  "id": "chatcmpl-123",
@@ -128,6 +177,3 @@ async def chat_completions():
128
  "total_tokens": 0
129
  }
130
  }
131
-
132
- if __name__ == '__main__':
133
- app.run(debug=True, port=5000)
 
1
  import json
 
2
  import random
 
3
  import asyncio
4
+ import aiohttp
5
+ from fastapi import FastAPI, Request, Response
6
+ from fastapi.responses import StreamingResponse
7
 
8
+ app = FastAPI()
9
 
10
  def generate_random_ip():
11
  return f"{random.randint(1, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}"
 
43
  }]
44
  }
45
 
46
+ async def parse_sse(response):
47
+ """
48
+ Parses Server-Sent Events from an aiohttp response.
49
+ """
50
+ event = {}
51
+ async for line in response.content:
52
+ line = line.decode('utf-8').strip()
53
+ if not line:
54
+ # Empty line indicates end of event
55
+ if event:
56
+ yield event
57
+ event = {}
58
+ continue
59
+ if line.startswith(':'):
60
+ # Comment line, ignore
61
+ continue
62
+ if ':' in line:
63
+ key, value = line.split(':', 1)
64
+ value = value.lstrip()
65
+ else:
66
+ key = line
67
+ value = ''
68
+ if key in event:
69
+ event[key] += '\n' + value
70
+ else:
71
+ event[key] = value
72
+ if event:
73
+ yield event
74
+
75
+ @app.post('/hf/v1/chat/completions')
76
+ async def chat_completions(request: Request):
77
+ data = await request.json()
78
  messages = data.get('messages', [])
79
  stream = data.get('stream', False)
80
+
81
  if not messages:
82
  return {"error": "No messages provided"}, 400
83
+
84
  model = data.get('model', 'gpt-4o')
85
  if model.startswith('gpt'):
86
  endpoint = "openAI"
 
103
  while True:
104
  conversation = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
105
  conversation += "\nPlease follow and reply to the user’s recent messages and avoid answers that summarize the conversation history."
106
+
107
  payload = {
108
  "text": conversation,
109
  "endpoint": endpoint,
 
111
  }
112
 
113
  async with aiohttp.ClientSession() as session:
114
+ async with session.post(original_api_url, headers=headers, json=payload) as resp:
115
+ if resp.status != 200:
116
+ yield f"data: {json.dumps({'error': 'Failed to connect to upstream server'})}\n\n"
117
+ return
118
+
119
+ async for event in parse_sse(resp):
120
+ if 'data' in event:
121
+ data = event['data']
122
+ if data.startswith('{"text":'):
123
+ data_json = json.loads(data)
124
+ new_content = data_json['text'][len(full_response):]
125
+ full_response = data_json['text']
126
+ if new_content:
127
+ yield f"data: {json.dumps(format_openai_response(new_content))}\n\n"
128
+ elif '"final":true' in data:
129
+ final_data = json.loads(data)
130
+ response_message = final_data.get('responseMessage', {})
131
+ finish_reason = response_message.get('finish_reason', 'stop')
132
+ if finish_reason == 'length':
133
+ messages.append({"role": "assistant", "content": full_response})
134
+ messages.append({"role": "user", "content": "Please continue your output and do not repeat the previous content"})
135
+ break # Continue with the next request
136
+ else:
137
+ last_content = response_message.get('text', '')
138
+ if last_content and last_content != full_response:
139
+ yield f"data: {json.dumps(format_openai_response(last_content[len(full_response):]))}\n\n"
140
+ yield f"data: {json.dumps(format_openai_response('', finish_reason))}\n\n"
141
+ yield "data: [DONE]\n\n"
142
+ return
143
+ yield f"data: {json.dumps(format_openai_response('', 'stop'))}\n\n"
144
+ yield "data: [DONE]\n\n"
145
+
146
  if stream:
147
+ return StreamingResponse(generate(), media_type='text/event-stream')
148
  else:
149
  full_response = ""
150
  finish_reason = "stop"
151
  async for chunk in generate():
152
+ if chunk.startswith("data: ") and not chunk.strip() == "data: [DONE]":
153
+ response_data = json.loads(chunk[6:])
154
+ if 'choices' in response_data and response_data['choices']:
155
+ delta = response_data['choices'][0].get('delta', {})
156
+ if 'content' in delta:
157
+ full_response += delta['content']
158
+ if 'finish_reason' in delta:
159
+ finish_reason = delta['finish_reason']
160
 
161
  return {
162
  "id": "chatcmpl-123",
 
177
  "total_tokens": 0
178
  }
179
  }