Niansuh commited on
Commit
0c07fe4
·
verified ·
1 Parent(s): 9281336

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -78
app.py CHANGED
@@ -1,11 +1,11 @@
1
  import json
 
2
  import random
3
- import asyncio
4
  import aiohttp
5
- from fastapi import FastAPI, Request, Response
6
- from fastapi.responses import StreamingResponse
7
 
8
- app = FastAPI()
9
 
10
  def generate_random_ip():
11
  return f"{random.randint(1, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}"
@@ -43,29 +43,15 @@ def format_openai_response(content, finish_reason=None):
43
  }]
44
  }
45
 
46
- def sse_parser():
47
- """Generator function to parse SSE messages."""
48
- data = ''
49
- while True:
50
- line = yield
51
- if line == '':
52
- if data:
53
- yield data
54
- data = ''
55
- elif line.startswith('data:'):
56
- data += line[5:].strip()
57
- else:
58
- continue
59
-
60
- @app.post('/hf/v1/chat/completions')
61
- async def chat_completions(request: Request):
62
- data = await request.json()
63
  messages = data.get('messages', [])
64
  stream = data.get('stream', False)
65
-
66
  if not messages:
67
  return {"error": "No messages provided"}, 400
68
-
69
  model = data.get('model', 'gpt-4o')
70
  if model.startswith('gpt'):
71
  endpoint = "openAI"
@@ -88,7 +74,7 @@ async def chat_completions(request: Request):
88
  while True:
89
  conversation = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
90
  conversation += "\nPlease follow and reply to the user’s recent messages and avoid answers that summarize the conversation history."
91
-
92
  payload = {
93
  "text": conversation,
94
  "endpoint": endpoint,
@@ -96,65 +82,32 @@ async def chat_completions(request: Request):
96
  }
97
 
98
  async with aiohttp.ClientSession() as session:
99
- async with session.post(original_api_url, headers=headers, json=payload) as resp:
100
- if resp.status != 200:
101
- yield f"data: {json.dumps({'error': 'Failed to connect to upstream server'})}\n\n"
102
- return
103
-
104
- parser = sse_parser()
105
- next(parser) # Initialize the generator
106
-
107
- async for line in resp.content:
108
- line = line.decode('utf-8').strip()
109
- if line == '':
110
- continue
111
-
112
- parser.send(line)
113
- try:
114
- event_data = parser.send(None)
115
- if event_data:
116
- # Process the SSE event
117
- event_json = json.loads(event_data)
118
- if 'text' in event_json:
119
- new_content = event_json['text'][len(full_response):]
120
- full_response = event_json['text']
121
- if new_content:
122
- yield f"data: {json.dumps(format_openai_response(new_content))}\n\n"
123
- elif '"final":true' in event_data:
124
- final_data = event_json
125
- response_message = final_data.get('responseMessage', {})
126
- finish_reason = response_message.get('finish_reason', 'stop')
127
- if finish_reason == 'length':
128
- messages.append({"role": "assistant", "content": full_response})
129
- messages.append({"role": "user", "content": "Please continue your output and do not repeat the previous content"})
130
- break # Continue with the next request
131
- else:
132
- last_content = response_message.get('text', '')
133
- if last_content and last_content != full_response:
134
- yield f"data: {json.dumps(format_openai_response(last_content[len(full_response):]))}\n\n"
135
- yield f"data: {json.dumps(format_openai_response('', finish_reason))}\n\n"
136
- yield "data: [DONE]\n\n"
137
- return
138
- except StopIteration:
139
- pass # No complete event yet
140
-
141
- yield f"data: {json.dumps(format_openai_response('', 'stop'))}\n\n"
142
- yield "data: [DONE]\n\n"
143
-
144
  if stream:
145
- return StreamingResponse(generate(), media_type='text/event-stream')
146
  else:
147
  full_response = ""
148
  finish_reason = "stop"
149
  async for chunk in generate():
150
- if chunk.startswith("data: ") and not chunk.strip() == "data: [DONE]":
151
- response_data = json.loads(chunk[6:])
152
- if 'choices' in response_data and response_data['choices']:
153
- delta = response_data['choices'][0].get('delta', {})
154
- if 'content' in delta:
155
- full_response += delta['content']
156
- if 'finish_reason' in delta:
157
- finish_reason = delta['finish_reason']
158
 
159
  return {
160
  "id": "chatcmpl-123",
@@ -175,3 +128,6 @@ async def chat_completions(request: Request):
175
  "total_tokens": 0
176
  }
177
  }
 
 
 
 
1
  import json
2
+ import sseclient
3
  import random
 
4
  import aiohttp
5
+ import asyncio
6
+ from flask import Flask, request, Response, stream_with_context
7
 
8
+ app = Flask(__name__)
9
 
10
  def generate_random_ip():
11
  return f"{random.randint(1, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}"
 
43
  }]
44
  }
45
 
46
+ @app.route('/hf/v1/chat/completions', methods=['POST'])
47
+ async def chat_completions():
48
+ data = request.json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  messages = data.get('messages', [])
50
  stream = data.get('stream', False)
51
+
52
  if not messages:
53
  return {"error": "No messages provided"}, 400
54
+
55
  model = data.get('model', 'gpt-4o')
56
  if model.startswith('gpt'):
57
  endpoint = "openAI"
 
74
  while True:
75
  conversation = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
76
  conversation += "\nPlease follow and reply to the user’s recent messages and avoid answers that summarize the conversation history."
77
+
78
  payload = {
79
  "text": conversation,
80
  "endpoint": endpoint,
 
82
  }
83
 
84
  async with aiohttp.ClientSession() as session:
85
+ async with session.post(original_api_url, headers=headers, json=payload) as response:
86
+ async for line in response.content:
87
+ if line.startswith(b'data: '):
88
+ event_data = line.decode().lstrip('data: ')
89
+ event = json.loads(event_data)
90
+ if event.get("final"):
91
+ yield f"data: {json.dumps(format_openai_response('', 'stop'))}\n\n"
92
+ return
93
+ new_content = event.get('text', '')[len(full_response):]
94
+ if new_content:
95
+ full_response = event.get('text', '')
96
+ yield f"data: {json.dumps(format_openai_response(new_content))}\n\n"
97
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  if stream:
99
+ return Response(stream_with_context(generate()), content_type='text/event-stream')
100
  else:
101
  full_response = ""
102
  finish_reason = "stop"
103
  async for chunk in generate():
104
+ response_data = json.loads(chunk[6:])
105
+ if 'choices' in response_data and response_data['choices']:
106
+ delta = response_data['choices'][0].get('delta', {})
107
+ if 'content' in delta:
108
+ full_response += delta['content']
109
+ if 'finish_reason' in delta:
110
+ finish_reason = delta['finish_reason']
 
111
 
112
  return {
113
  "id": "chatcmpl-123",
 
128
  "total_tokens": 0
129
  }
130
  }
131
+
132
+ if __name__ == '__main__':
133
+ app.run(debug=True, port=5000)