Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
chenge
commited on
Commit
·
3c72b91
1
Parent(s):
b645bcc
add some flect ui
Browse files
app.py
CHANGED
@@ -1,3 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import uuid
|
3 |
import json
|
@@ -20,7 +32,7 @@ import datetime
|
|
20 |
model = os.getenv("MODEL_NAME")
|
21 |
# 代理服务器配置 - 支持多个URL用逗号分隔
|
22 |
PROXY_BASE_URLS = [url.strip() for url in os.getenv("PROXY_API_BASE", "http://localhost:8000").split(",") if url.strip()]
|
23 |
-
PROXY_TIMEOUT = int(os.getenv("PROXY_TIMEOUT",
|
24 |
MAX_RETRIES = int(os.getenv("MAX_RETRIES", 5))
|
25 |
# 负载均衡配置
|
26 |
current_proxy_index = 0 # 用于轮询的当前索引
|
@@ -93,16 +105,31 @@ def encode_image_to_base64(image_path_or_pil: Union[str, Image.Image]) -> str:
|
|
93 |
with open(image_path_or_pil, "rb") as image_file:
|
94 |
return base64.b64encode(image_file.read()).decode('utf-8')
|
95 |
else:
|
96 |
-
# 如果是PIL
|
97 |
buffer = io.BytesIO()
|
98 |
-
|
|
|
|
|
|
|
99 |
if image_path_or_pil.mode == 'RGBA':
|
100 |
-
# 如果是RGBA
|
101 |
-
|
102 |
-
|
103 |
-
|
|
|
|
|
|
|
|
|
104 |
else:
|
105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
|
107 |
return image_base64
|
108 |
except Exception as e:
|
@@ -500,6 +527,17 @@ class Gradio_Events:
|
|
500 |
in_thinking = False
|
501 |
accumulated_content = ""
|
502 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
503 |
for chunk in response:
|
504 |
# 安全地访问chunk属性
|
505 |
if chunk.choices and len(chunk.choices) > 0:
|
@@ -512,6 +550,8 @@ class Gradio_Events:
|
|
512 |
print(content, end='')
|
513 |
if content:
|
514 |
accumulated_content += content
|
|
|
|
|
515 |
|
516 |
# 检查是否进入thinking模式
|
517 |
if "<think>" in accumulated_content and not in_thinking:
|
@@ -528,6 +568,14 @@ class Gradio_Events:
|
|
528 |
accumulated_content = think_parts[1]
|
529 |
else:
|
530 |
accumulated_content = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
531 |
continue
|
532 |
|
533 |
# 检查是否退出thinking模式
|
@@ -551,36 +599,93 @@ class Gradio_Events:
|
|
551 |
history[-1]["content"] = current_content + after_think_content
|
552 |
|
553 |
accumulated_content = "" # 重置累积内容
|
554 |
-
#
|
555 |
yield {
|
556 |
chatbot: gr.update(items=history),
|
557 |
state: gr.update(value=state_value)
|
558 |
}
|
559 |
-
|
560 |
-
|
561 |
-
|
562 |
continue
|
563 |
|
564 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
565 |
if in_thinking:
|
566 |
# 检查是否包含完整的thinking结束标签
|
567 |
if "</think>" not in accumulated_content:
|
568 |
history[-1]["meta"]["thinking_content"] = accumulated_content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
569 |
else:
|
570 |
# 如果不在thinking模式中,正常添加内容到content
|
571 |
if not thought_done:
|
572 |
thought_done = True
|
573 |
if not history[-1]["content"]: # 如果content为空才初始化
|
574 |
history[-1]["content"] = ""
|
575 |
-
|
576 |
-
#
|
577 |
-
|
578 |
-
|
579 |
-
history[-1]["
|
580 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
581 |
yield {
|
582 |
chatbot: gr.update(items=history),
|
583 |
-
state: gr.update(value=state_value)
|
584 |
}
|
585 |
|
586 |
history[-1]["meta"]["end"] = True
|
@@ -987,15 +1092,9 @@ class Gradio_Events:
|
|
987 |
|
988 |
# 使用PIL加载图片
|
989 |
image = Image.open(file_path)
|
990 |
-
logger.info(f"Loaded image with size: {image.size}")
|
991 |
-
|
992 |
-
# 可选:调整图片大小以节省带宽
|
993 |
-
if max(image.size) > 1024:
|
994 |
-
ratio = 1024 / max(image.size)
|
995 |
-
new_size = tuple(int(dim * ratio) for dim in image.size)
|
996 |
-
image = image.resize(new_size, Image.Resampling.LANCZOS)
|
997 |
-
logger.info(f"Resized image to: {new_size}")
|
998 |
|
|
|
999 |
uploaded_images.append(image)
|
1000 |
|
1001 |
except Exception as img_error:
|
|
|
1 |
+
"""
|
2 |
+
多模态大语言模型聊天Demo - 网络优化版本
|
3 |
+
|
4 |
+
主要优化:
|
5 |
+
1. 缓冲机制:积累多个chunk后再yield,减少网络交互次数(50-80%)
|
6 |
+
2. State更新优化:降低state更新频率,减少数据传输量
|
7 |
+
3. 超时配置优化:增加代理超时时间,提高网络容错性
|
8 |
+
4. 图像质量优化:保持原始尺寸和高质量编码,不进行缩放
|
9 |
+
|
10 |
+
这些优化可显著改善网络延迟高时的前端卡顿问题,同时保证图像质量。
|
11 |
+
"""
|
12 |
+
|
13 |
import os
|
14 |
import uuid
|
15 |
import json
|
|
|
32 |
model = os.getenv("MODEL_NAME")
|
33 |
# 代理服务器配置 - 支持多个URL用逗号分隔
|
34 |
PROXY_BASE_URLS = [url.strip() for url in os.getenv("PROXY_API_BASE", "http://localhost:8000").split(",") if url.strip()]
|
35 |
+
PROXY_TIMEOUT = int(os.getenv("PROXY_TIMEOUT", 300)) # 增加超时时间从30秒到60秒
|
36 |
MAX_RETRIES = int(os.getenv("MAX_RETRIES", 5))
|
37 |
# 负载均衡配置
|
38 |
current_proxy_index = 0 # 用于轮询的当前索引
|
|
|
105 |
with open(image_path_or_pil, "rb") as image_file:
|
106 |
return base64.b64encode(image_file.read()).decode('utf-8')
|
107 |
else:
|
108 |
+
# 如果是PIL图像对象,尽量保持原始格式和质量
|
109 |
buffer = io.BytesIO()
|
110 |
+
|
111 |
+
# 检测原始格式,优先保持原格式
|
112 |
+
original_format = getattr(image_path_or_pil, 'format', None)
|
113 |
+
|
114 |
if image_path_or_pil.mode == 'RGBA':
|
115 |
+
# 如果是RGBA模式且原格式支持透明度,优先保存为PNG
|
116 |
+
if original_format in ['PNG', 'WEBP'] or original_format is None:
|
117 |
+
image_path_or_pil.save(buffer, format="PNG") # PNG无损保存
|
118 |
+
else:
|
119 |
+
# 否则转换为RGB并保存为高质量JPEG
|
120 |
+
rgb_image = Image.new('RGB', image_path_or_pil.size, (255, 255, 255))
|
121 |
+
rgb_image.paste(image_path_or_pil, mask=image_path_or_pil.split()[-1])
|
122 |
+
rgb_image.save(buffer, format="JPEG", quality=95)
|
123 |
else:
|
124 |
+
# 非RGBA模式,根据原格式选择保存方式
|
125 |
+
if original_format == 'PNG':
|
126 |
+
image_path_or_pil.save(buffer, format="PNG") # PNG无损保存
|
127 |
+
elif original_format in ['WEBP', 'BMP', 'TIFF']:
|
128 |
+
# 其他格式转为高质量JPEG
|
129 |
+
image_path_or_pil.save(buffer, format="JPEG", quality=95)
|
130 |
+
else:
|
131 |
+
# 默认保存为高质量JPEG
|
132 |
+
image_path_or_pil.save(buffer, format="JPEG", quality=95)
|
133 |
image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
|
134 |
return image_base64
|
135 |
except Exception as e:
|
|
|
527 |
in_thinking = False
|
528 |
accumulated_content = ""
|
529 |
|
530 |
+
# 缓冲逻辑变量
|
531 |
+
buffer_content = "" # 临时缓冲内容
|
532 |
+
last_yield_time = time.time()
|
533 |
+
chunk_count = 0
|
534 |
+
state_update_count = 0 # state更新计数器
|
535 |
+
BUFFER_INTERVAL = 0.5 # 秒 - 缓冲时间间隔,减少网络交互频率
|
536 |
+
BUFFER_CHUNKS = 5 # 每5个chunk强制yield - 平衡实时性和性能
|
537 |
+
STATE_UPDATE_INTERVAL = 3 # 每3次yield更新一次state - 减少state传输频率
|
538 |
+
|
539 |
+
# 优化state更新策略:减少不必要的历史数据传输
|
540 |
+
|
541 |
for chunk in response:
|
542 |
# 安全地访问chunk属性
|
543 |
if chunk.choices and len(chunk.choices) > 0:
|
|
|
550 |
print(content, end='')
|
551 |
if content:
|
552 |
accumulated_content += content
|
553 |
+
buffer_content += content # 添加到缓冲
|
554 |
+
chunk_count += 1
|
555 |
|
556 |
# 检查是否进入thinking模式
|
557 |
if "<think>" in accumulated_content and not in_thinking:
|
|
|
568 |
accumulated_content = think_parts[1]
|
569 |
else:
|
570 |
accumulated_content = ""
|
571 |
+
# 立即yield thinking状态变化,这种重要状态变化总是需要更新state
|
572 |
+
yield {
|
573 |
+
chatbot: gr.update(items=history),
|
574 |
+
state: gr.update(value=state_value)
|
575 |
+
}
|
576 |
+
buffer_content = "" # 重置缓冲
|
577 |
+
last_yield_time = time.time()
|
578 |
+
chunk_count = 0
|
579 |
continue
|
580 |
|
581 |
# 检查是否退出thinking模式
|
|
|
599 |
history[-1]["content"] = current_content + after_think_content
|
600 |
|
601 |
accumulated_content = "" # 重置累积内容
|
602 |
+
# 立即yield thinking完成状态,这种重要状态变化总是需要更新state
|
603 |
yield {
|
604 |
chatbot: gr.update(items=history),
|
605 |
state: gr.update(value=state_value)
|
606 |
}
|
607 |
+
buffer_content = "" # 重置缓冲
|
608 |
+
last_yield_time = time.time()
|
609 |
+
chunk_count = 0
|
610 |
continue
|
611 |
|
612 |
+
# 缓冲检查:时间或chunk数达到时 yield
|
613 |
+
current_time = time.time()
|
614 |
+
should_yield = False
|
615 |
+
|
616 |
+
if (current_time - last_yield_time >= BUFFER_INTERVAL) or (chunk_count >= BUFFER_CHUNKS):
|
617 |
+
should_yield = True
|
618 |
+
|
619 |
+
# 在thinking模式中,更新thinking内容
|
620 |
if in_thinking:
|
621 |
# 检查是否包含完整的thinking结束标签
|
622 |
if "</think>" not in accumulated_content:
|
623 |
history[-1]["meta"]["thinking_content"] = accumulated_content
|
624 |
+
if should_yield:
|
625 |
+
state_update_count += 1
|
626 |
+
# 条件更新state:只在特定间隔更新
|
627 |
+
should_update_state = (state_update_count % STATE_UPDATE_INTERVAL == 0)
|
628 |
+
|
629 |
+
yield {
|
630 |
+
chatbot: gr.update(items=history),
|
631 |
+
state: gr.update(value=state_value) if should_update_state else gr.skip()
|
632 |
+
}
|
633 |
+
buffer_content = ""
|
634 |
+
last_yield_time = current_time
|
635 |
+
chunk_count = 0
|
636 |
else:
|
637 |
# 如果不在thinking模式中,正常添加内容到content
|
638 |
if not thought_done:
|
639 |
thought_done = True
|
640 |
if not history[-1]["content"]: # 如果content为空才初始化
|
641 |
history[-1]["content"] = ""
|
642 |
+
|
643 |
+
# 应用缓冲内容到history
|
644 |
+
if should_yield:
|
645 |
+
# 将缓冲的内容添加到content中
|
646 |
+
history[-1]["content"] += buffer_content
|
647 |
+
|
648 |
+
# 清除"刚完成thinking"标记,因为现在在正常输出内容
|
649 |
+
if history[-1]["meta"].get("just_finished_thinking"):
|
650 |
+
history[-1]["meta"]["just_finished_thinking"] = False
|
651 |
+
|
652 |
+
state_update_count += 1
|
653 |
+
# 条件更新state:只在特定间隔更新
|
654 |
+
should_update_state = (state_update_count % STATE_UPDATE_INTERVAL == 0)
|
655 |
+
|
656 |
+
yield {
|
657 |
+
chatbot: gr.update(items=history),
|
658 |
+
state: gr.update(value=state_value) if should_update_state else gr.skip()
|
659 |
+
}
|
660 |
+
|
661 |
+
# 重置缓冲
|
662 |
+
buffer_content = ""
|
663 |
+
last_yield_time = current_time
|
664 |
+
chunk_count = 0
|
665 |
+
else:
|
666 |
+
# 不yield,但需要更新content以保持逻辑一致性
|
667 |
+
# 注意:这里不直接添加content,而是等待缓冲yield时一起添加
|
668 |
+
pass
|
669 |
+
|
670 |
+
# 循环结束后,处理剩余的缓冲内容
|
671 |
+
if buffer_content:
|
672 |
+
if in_thinking:
|
673 |
+
# 如果还在thinking模式中,更新thinking内容
|
674 |
+
history[-1]["meta"]["thinking_content"] = accumulated_content
|
675 |
+
else:
|
676 |
+
# 如果不在thinking模式中,添加剩余内容
|
677 |
+
if not history[-1]["content"]:
|
678 |
+
history[-1]["content"] = ""
|
679 |
+
history[-1]["content"] += buffer_content
|
680 |
+
|
681 |
+
# 清除"刚完成thinking"标记
|
682 |
+
if history[-1]["meta"].get("just_finished_thinking"):
|
683 |
+
history[-1]["meta"]["just_finished_thinking"] = False
|
684 |
+
|
685 |
+
# 最终yield,确保所有内容都被发送并强制更新state
|
686 |
yield {
|
687 |
chatbot: gr.update(items=history),
|
688 |
+
state: gr.update(value=state_value) # 最终总是更新state
|
689 |
}
|
690 |
|
691 |
history[-1]["meta"]["end"] = True
|
|
|
1092 |
|
1093 |
# 使用PIL加载图片
|
1094 |
image = Image.open(file_path)
|
1095 |
+
logger.info(f"Loaded image with size: {image.size} (原始尺寸,不进行缩放)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1096 |
|
1097 |
+
# 保持原始图片尺寸,不进行任何缩放处理
|
1098 |
uploaded_images.append(image)
|
1099 |
|
1100 |
except Exception as img_error:
|