Spaces:

rednote-hilab
/

dots-vlm1-demo2

Running on CPU Upgrade

App Files Files Community

chenge commited on 9 days ago

Commit

3c72b91

1 Parent(s): b645bcc

add some flect ui

Browse files

Files changed (1) hide show

app.py +127 -28

app.py CHANGED Viewed

@@ -1,3 +1,15 @@
 import os
 import uuid
 import json
@@ -20,7 +32,7 @@ import datetime
 model = os.getenv("MODEL_NAME")
 # 代理服务器配置 - 支持多个URL用逗号分隔
 PROXY_BASE_URLS = [url.strip() for url in os.getenv("PROXY_API_BASE", "http://localhost:8000").split(",") if url.strip()]
-PROXY_TIMEOUT = int(os.getenv("PROXY_TIMEOUT", 30))
 MAX_RETRIES = int(os.getenv("MAX_RETRIES", 5))
 # 负载均衡配置
 current_proxy_index = 0  # 用于轮询的当前索引
@@ -93,16 +105,31 @@ def encode_image_to_base64(image_path_or_pil: Union[str, Image.Image]) -> str:
             with open(image_path_or_pil, "rb") as image_file:
                 return base64.b64encode(image_file.read()).decode('utf-8')
         else:
-            # 如果是PIL图像对象
             buffer = io.BytesIO()
-            # 保存为JPEG格式
             if image_path_or_pil.mode == 'RGBA':
-                # 如果是RGBA模式，转换为RGB
-                rgb_image = Image.new('RGB', image_path_or_pil.size, (255, 255, 255))
-                rgb_image.paste(image_path_or_pil, mask=image_path_or_pil.split()[-1])
-                rgb_image.save(buffer, format="JPEG", quality=85)
             else:
-                image_path_or_pil.save(buffer, format="JPEG", quality=85)
             image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
             return image_base64
     except Exception as e:
@@ -500,6 +527,17 @@ class Gradio_Events:
             in_thinking = False
             accumulated_content = ""
             for chunk in response:
                 # 安全地访问chunk属性
                 if chunk.choices and len(chunk.choices) > 0:
@@ -512,6 +550,8 @@ class Gradio_Events:
                 print(content, end='')
                 if content:
                     accumulated_content += content
                     # 检查是否进入thinking模式
                     if "<think>" in accumulated_content and not in_thinking:
@@ -528,6 +568,14 @@ class Gradio_Events:
                             accumulated_content = think_parts[1]
                         else:
                             accumulated_content = ""
                         continue
                     # 检查是否退出thinking模式
@@ -551,36 +599,93 @@ class Gradio_Events:
                                 history[-1]["content"] = current_content + after_think_content
                         accumulated_content = ""  # 重置累积内容
-                        # 先yield一次，让前端检测到just_finished_thinking状态
                         yield {
                             chatbot: gr.update(items=history),
                             state: gr.update(value=state_value)
                         }
-                        # 延迟清除just_finished_thinking标记的逻辑移到下次yield时处理
-                        # 这样确保前端有足够时间检测并执行自动折叠
                         continue
-                    # 如果在thinking模式中，只更新thinking内容，不修改content
                     if in_thinking:
                         # 检查是否包含完整的thinking结束标签
                         if "</think>" not in accumulated_content:
                             history[-1]["meta"]["thinking_content"] = accumulated_content
                     else:
                         # 如果不在thinking模式中，正常添加内容到content
                         if not thought_done:
                             thought_done = True
                             if not history[-1]["content"]:  # 如果content为空才初始化
                                 history[-1]["content"] = ""
-                        history[-1]["content"] += content
-                        # 清除"刚完成thinking"标记，因为现在在正常输出内容
-                        # 在输出后续内容时自动清除标记，让用户可以重新控制折叠状态
-                        if history[-1]["meta"].get("just_finished_thinking"):
-                            history[-1]["meta"]["just_finished_thinking"] = False
                 yield {
                     chatbot: gr.update(items=history),
-                    state: gr.update(value=state_value)
                 }
             history[-1]["meta"]["end"] = True
@@ -987,15 +1092,9 @@ class Gradio_Events:
                         # 使用PIL加载图片
                         image = Image.open(file_path)
-                        logger.info(f"Loaded image with size: {image.size}")
-                        # 可选：调整图片大小以节省带宽
-                        if max(image.size) > 1024:
-                            ratio = 1024 / max(image.size)
-                            new_size = tuple(int(dim * ratio) for dim in image.size)
-                            image = image.resize(new_size, Image.Resampling.LANCZOS)
-                            logger.info(f"Resized image to: {new_size}")
                         uploaded_images.append(image)
                     except Exception as img_error:

+"""
+多模态大语言模型聊天Demo - 网络优化版本
+主要优化：
+1. 缓冲机制：积累多个chunk后再yield，减少网络交互次数（50-80%）
+2. State更新优化：降低state更新频率，减少数据传输量
+3. 超时配置优化：增加代理超时时间，提高网络容错性
+4. 图像质量优化：保持原始尺寸和高质量编码，不进行缩放
+这些优化可显著改善网络延迟高时的前端卡顿问题，同时保证图像质量。
+"""
 import os
 import uuid
 import json
 model = os.getenv("MODEL_NAME")
 # 代理服务器配置 - 支持多个URL用逗号分隔
 PROXY_BASE_URLS = [url.strip() for url in os.getenv("PROXY_API_BASE", "http://localhost:8000").split(",") if url.strip()]
+PROXY_TIMEOUT = int(os.getenv("PROXY_TIMEOUT", 300))  # 增加超时时间从30秒到60秒
 MAX_RETRIES = int(os.getenv("MAX_RETRIES", 5))
 # 负载均衡配置
 current_proxy_index = 0  # 用于轮询的当前索引
             with open(image_path_or_pil, "rb") as image_file:
                 return base64.b64encode(image_file.read()).decode('utf-8')
         else:
+            # 如果是PIL图像对象，尽量保持原始格式和质量
             buffer = io.BytesIO()
+            # 检测原始格式，优先保持原格式
+            original_format = getattr(image_path_or_pil, 'format', None)
             if image_path_or_pil.mode == 'RGBA':
+                # 如果是RGBA模式且原格式支持透明度，优先保存为PNG
+                if original_format in ['PNG', 'WEBP'] or original_format is None:
+                    image_path_or_pil.save(buffer, format="PNG")  # PNG无损保存
+                else:
+                    # 否则转换为RGB并保存为高质量JPEG
+                    rgb_image = Image.new('RGB', image_path_or_pil.size, (255, 255, 255))
+                    rgb_image.paste(image_path_or_pil, mask=image_path_or_pil.split()[-1])
+                    rgb_image.save(buffer, format="JPEG", quality=95)
             else:
+                # 非RGBA模式，根据原格式选择保存方式
+                if original_format == 'PNG':
+                    image_path_or_pil.save(buffer, format="PNG")  # PNG无损保存
+                elif original_format in ['WEBP', 'BMP', 'TIFF']:
+                    # 其他格式转为高质量JPEG
+                    image_path_or_pil.save(buffer, format="JPEG", quality=95)
+                else:
+                    # 默认保存为高质量JPEG
+                    image_path_or_pil.save(buffer, format="JPEG", quality=95)
             image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
             return image_base64
     except Exception as e:
             in_thinking = False
             accumulated_content = ""
+            # 缓冲逻辑变量
+            buffer_content = ""  # 临时缓冲内容
+            last_yield_time = time.time()
+            chunk_count = 0
+            state_update_count = 0  # state更新计数器
+            BUFFER_INTERVAL = 0.5  # 秒 - 缓冲时间间隔，减少网络交互频率
+            BUFFER_CHUNKS = 5  # 每5个chunk强制yield - 平衡实时性和性能
+            STATE_UPDATE_INTERVAL = 3  # 每3次yield更新一次state - 减少state传输频率
+            # 优化state更新策略：减少不必要的历史数据传输
             for chunk in response:
                 # 安全地访问chunk属性
                 if chunk.choices and len(chunk.choices) > 0:
                 print(content, end='')
                 if content:
                     accumulated_content += content
+                    buffer_content += content  # 添加到缓冲
+                    chunk_count += 1
                     # 检查是否进入thinking模式
                     if "<think>" in accumulated_content and not in_thinking:
                             accumulated_content = think_parts[1]
                         else:
                             accumulated_content = ""
+                        # 立即yield thinking状态变化，这种重要状态变化总是需要更新state
+                        yield {
+                            chatbot: gr.update(items=history),
+                            state: gr.update(value=state_value)
+                        }
+                        buffer_content = ""  # 重置缓冲
+                        last_yield_time = time.time()
+                        chunk_count = 0
                         continue
                     # 检查是否退出thinking模式
                                 history[-1]["content"] = current_content + after_think_content
                         accumulated_content = ""  # 重置累积内容
+                        # 立即yield thinking完成状态，这种重要状态变化总是需要更新state
                         yield {
                             chatbot: gr.update(items=history),
                             state: gr.update(value=state_value)
                         }
+                        buffer_content = ""  # 重置缓冲
+                        last_yield_time = time.time()
+                        chunk_count = 0
                         continue
+                    # 缓冲检查：时间或chunk数达到时 yield
+                    current_time = time.time()
+                    should_yield = False
+                    if (current_time - last_yield_time >= BUFFER_INTERVAL) or (chunk_count >= BUFFER_CHUNKS):
+                        should_yield = True
+                    # 在thinking模式中，更新thinking内容
                     if in_thinking:
                         # 检查是否包含完整的thinking结束标签
                         if "</think>" not in accumulated_content:
                             history[-1]["meta"]["thinking_content"] = accumulated_content
+                            if should_yield:
+                                state_update_count += 1
+                                # 条件更新state：只在特定间隔更新
+                                should_update_state = (state_update_count % STATE_UPDATE_INTERVAL == 0)
+                                yield {
+                                    chatbot: gr.update(items=history),
+                                    state: gr.update(value=state_value) if should_update_state else gr.skip()
+                                }
+                                buffer_content = ""
+                                last_yield_time = current_time
+                                chunk_count = 0
                     else:
                         # 如果不在thinking模式中，正常添加内容到content
                         if not thought_done:
                             thought_done = True
                             if not history[-1]["content"]:  # 如果content为空才初始化
                                 history[-1]["content"] = ""
+                        # 应用缓冲内容到history
+                        if should_yield:
+                            # 将缓冲的内容添加到content中
+                            history[-1]["content"] += buffer_content
+                            # 清除"刚完成thinking"标记，因为现在在正常输出内容
+                            if history[-1]["meta"].get("just_finished_thinking"):
+                                history[-1]["meta"]["just_finished_thinking"] = False
+                            state_update_count += 1
+                            # 条件更新state：只在特定间隔更新
+                            should_update_state = (state_update_count % STATE_UPDATE_INTERVAL == 0)
+                            yield {
+                                chatbot: gr.update(items=history),
+                                state: gr.update(value=state_value) if should_update_state else gr.skip()
+                            }
+                            # 重置缓冲
+                            buffer_content = ""
+                            last_yield_time = current_time
+                            chunk_count = 0
+                        else:
+                            # 不yield，但需要更新content以保持逻辑一致性
+                            # 注意：这里不直接添加content，而是等待缓冲yield时一起添加
+                            pass
+            # 循环结束后，处理剩余的缓冲内容
+            if buffer_content:
+                if in_thinking:
+                    # 如果还在thinking模式中，更新thinking内容
+                    history[-1]["meta"]["thinking_content"] = accumulated_content
+                else:
+                    # 如果不在thinking模式中，添加剩余内容
+                    if not history[-1]["content"]:
+                        history[-1]["content"] = ""
+                    history[-1]["content"] += buffer_content
+                    # 清除"刚完成thinking"标记
+                    if history[-1]["meta"].get("just_finished_thinking"):
+                        history[-1]["meta"]["just_finished_thinking"] = False
+                # 最终yield，确保所有内容都被发送并强制更新state
                 yield {
                     chatbot: gr.update(items=history),
+                    state: gr.update(value=state_value)  # 最终总是更新state
                 }
             history[-1]["meta"]["end"] = True
                         # 使用PIL加载图片
                         image = Image.open(file_path)
+                        logger.info(f"Loaded image with size: {image.size} (原始尺寸，不进行缩放)")
+                        # 保持原始图片尺寸，不进行任何缩放处理
                         uploaded_images.append(image)
                     except Exception as img_error: