Serg4451D commited on
Commit
d8cecb5
·
verified ·
1 Parent(s): 152bf38

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -60
app.py CHANGED
@@ -1,14 +1,5 @@
1
  #!/usr/bin/env python3
2
  # -*- coding: utf-8 -*-
3
- """
4
- Минималистичный чат (одна кнопка отправки):
5
- - Вложения через иконку в поле ввода (как в мессенджерах).
6
- - Пайплайн: Florence-2 (NIM API) → GPT-OSS (NVIDIA Integrate).
7
- - Внизу: необработанный сырой вывод Florence для отладки.
8
- - Без WebGPU/wasm.
9
-
10
- Требуется в Secrets: NV_API_KEY
11
- """
12
 
13
  import os
14
  import io
@@ -94,12 +85,6 @@ def _deep_text_candidates(obj: Any) -> List[str]:
94
  return out
95
 
96
  def _debug_dump_from_response(resp: requests.Response) -> str:
97
- """
98
- Возвращает максимально сырой тех. вывод:
99
- - статус, content-type, длина
100
- - если JSON: raw text
101
- - если ZIP: список файлов и сырые JSON/TXT содержимое
102
- """
103
  lines = []
104
  data = resp.content
105
  ct = (resp.headers.get("content-type") or "").lower()
@@ -109,7 +94,6 @@ def _debug_dump_from_response(resp: requests.Response) -> str:
109
  lines.append(f"content-type: {ct}")
110
  lines.append(f"bytes: {len(data)}")
111
 
112
- # JSON
113
  if "application/json" in ct and not data.startswith(b"PK"):
114
  try:
115
  raw = resp.text
@@ -119,14 +103,12 @@ def _debug_dump_from_response(resp: requests.Response) -> str:
119
  lines.append(raw)
120
  return "\n".join(lines)
121
 
122
- # ZIP
123
  if data.startswith(b"PK") or "zip" in ct or "octet-stream" in ct:
124
  lines.append("--- ZIP CONTENTS ---")
125
  try:
126
  with zipfile.ZipFile(io.BytesIO(data), "r") as z:
127
  for name in z.namelist():
128
  lines.append(f"* {name}")
129
- # Сырые JSON/TXT
130
  for name in z.namelist():
131
  low = name.lower()
132
  if low.endswith(".json") or low.endswith(".txt"):
@@ -140,7 +122,6 @@ def _debug_dump_from_response(resp: requests.Response) -> str:
140
  lines.append(f"[zip parse error: {e}]")
141
  return "\n".join(lines)
142
 
143
- # Фолбэк: просто выбросить текстовое содержимое
144
  try:
145
  txt = data.decode("utf-8", errors="ignore")
146
  except Exception:
@@ -150,13 +131,9 @@ def _debug_dump_from_response(resp: requests.Response) -> str:
150
  return "\n".join(lines)
151
 
152
  def _parse_vlm_text(resp: requests.Response) -> str:
153
- """
154
- Достаём лучший текст (если он есть).
155
- """
156
  data = resp.content
157
  ct = (resp.headers.get("content-type") or "").lower()
158
 
159
- # JSON
160
  if "application/json" in ct and not data.startswith(b"PK"):
161
  try:
162
  obj = resp.json()
@@ -165,11 +142,9 @@ def _parse_vlm_text(resp: requests.Response) -> str:
165
  except Exception:
166
  return ""
167
 
168
- # ZIP → ищем JSON/TXT
169
  if data.startswith(b"PK") or "zip" in ct or "octet-stream" in ct:
170
  try:
171
  with zipfile.ZipFile(io.BytesIO(data), "r") as z:
172
- # JSON приоритет
173
  for name in z.namelist():
174
  if not name.lower().endswith(".json"):
175
  continue
@@ -181,7 +156,6 @@ def _parse_vlm_text(resp: requests.Response) -> str:
181
  return cands[0]
182
  except Exception:
183
  pass
184
- # затем TXT
185
  for name in z.namelist():
186
  if name.lower().endswith(".txt"):
187
  try:
@@ -194,16 +168,12 @@ def _parse_vlm_text(resp: requests.Response) -> str:
194
  except Exception:
195
  return ""
196
 
197
- # Фоллбек
198
  try:
199
  return data.decode("utf-8", errors="ignore").strip()
200
  except Exception:
201
  return ""
202
 
203
  def _call_florence(task_token: str, asset_id: str, text_prompt: Optional[str] = None) -> Tuple[str, str]:
204
- """
205
- Возвращает (best_text, raw_debug_dump)
206
- """
207
  content = _vlm_content(task_token, asset_id, text_prompt)
208
  payload = {"messages": [{"role": "user", "content": content}]}
209
  headers = {
@@ -214,7 +184,6 @@ def _call_florence(task_token: str, asset_id: str, text_prompt: Optional[str] =
214
  "NVCF-FUNCTION-ASSET-IDS": asset_id,
215
  }
216
  resp = requests.post(NV_VLM_URL, headers=headers, json=payload, timeout=300)
217
- # Сырой дамп для отладки — даже если статус не 200
218
  raw_dump = _debug_dump_from_response(resp) if resp is not None else "[no response]"
219
  if not resp.ok:
220
  return f"[VLM HTTP {resp.status_code}]", raw_dump
@@ -225,10 +194,6 @@ def _is_good(text: str) -> bool:
225
  return isinstance(text, str) and len(text.strip()) >= 3 and "изображений-результатов" not in text.lower()
226
 
227
  def get_caption_with_debug(image_path: str) -> Tuple[str, str, str]:
228
- """
229
- Пробуем <MORE_DETAILED_CAPTION> → <DETAILED_CAPTION> → <CAPTION> → <OCR>.
230
- Возвращает (caption, asset_id, debug_raw_all_attempts)
231
- """
232
  asset_id = nvcf_upload_asset(image_path)
233
  attempts = [
234
  ("<MORE_DETAILED_CAPTION>", None),
@@ -271,15 +236,11 @@ def _extract_text_from_stream_chunk(chunk: Any) -> str:
271
  # --------------------- Чат-логика ---------------------
272
  def respond(
273
  message: Dict[str, Any],
274
- chat_history: List[List[str]],
275
  last_caption: str,
276
  last_asset_id: str,
277
  last_debug: str
278
  ):
279
- """
280
- message: MultimodalTextbox -> {"text": str, "files": [<paths or dicts>]}
281
- Пайплайн: если есть изображение → Florence (капшен + raw) → GPT-OSS.
282
- """
283
  text = (message or {}).get("text", "") if isinstance(message, dict) else str(message or "")
284
  files = (message or {}).get("files", []) if isinstance(message, dict) else []
285
 
@@ -296,7 +257,6 @@ def respond(
296
 
297
  img_path = first_image_path(files)
298
 
299
- # Видимый месседж пользователя
300
  parts = []
301
  if text and text.strip():
302
  parts.append(text.strip())
@@ -305,24 +265,22 @@ def respond(
305
  user_visible = "\n".join(parts) if parts else "🖐️"
306
 
307
  chat_history = chat_history or []
308
- chat_history.append([user_visible, ""])
309
- # Первое обновление UI (очищаем поле ввода)
310
  yield {"text": "", "files": []}, chat_history, last_caption, last_asset_id, (last_debug or "")
311
 
312
  caption = last_caption or ""
313
  asset_id = last_asset_id or ""
314
  debug_raw = last_debug or ""
315
 
316
- # Всегда: если есть изображение в запросе — сначала Florence
317
  if img_path:
318
- chat_history[-1][1] = "🔎 Обрабатываю изображение во Florence…"
319
  yield {"text": "", "files": []}, chat_history, caption, asset_id, (debug_raw or "")
320
  try:
321
  caption, asset_id, debug_raw = get_caption_with_debug(img_path)
322
  except Exception as e:
323
  caption, debug_raw = "", f"[Florence error] {e}"
324
 
325
- # Системный промпт
326
  if caption:
327
  system_prompt = (
328
  "You are a helpful multimodal assistant. "
@@ -338,10 +296,8 @@ def respond(
338
  "If the user refers to an image but no caption is available, ask them to reattach the image."
339
  )
340
 
341
- # User text для модели
342
  user_text_for_llm = text or ("Describe the attached image." if caption else "Hi")
343
 
344
- # Стрим LLM
345
  assistant_accum = ""
346
  try:
347
  stream = llm.chat.completions.create(
@@ -360,11 +316,9 @@ def respond(
360
  if not piece:
361
  continue
362
  assistant_accum += piece
363
- chat_history[-1][1] = assistant_accum
364
- # Показываем сырой вывод Florence внизу (фиксированный на запрос)
365
  yield {"text": "", "files": []}, chat_history, caption, asset_id, (debug_raw or "")
366
- except Exception as e:
367
- # Фоллбэк без стрима
368
  try:
369
  resp = llm.chat.completions.create(
370
  model="openai/gpt-oss-120b",
@@ -392,10 +346,10 @@ def respond(
392
  final_text = str(resp)
393
  else:
394
  final_text = str(resp)
395
- chat_history[-1][1] = final_text
396
  yield {"text": "", "files": []}, chat_history, caption, asset_id, (debug_raw or "")
397
  except Exception as e2:
398
- chat_history[-1][1] = f"[Ошибка LLM: {e2}]"
399
  yield {"text": "", "files": []}, chat_history, caption, asset_id, (debug_raw or "")
400
 
401
  # --------------------- Интерфейс ---------------------
@@ -410,7 +364,7 @@ messenger_css = """
410
  #bottom-bar { position: sticky; bottom: 0; background: var(--body-background-fill); border-top: 1px solid rgba(0,0,0,0.06); padding: 8px; display: flex; gap: 8px; align-items: center; }
411
  #send { min-width: 42px; max-width: 42px; height: 42px; border-radius: 999px; }
412
  #msg .mm-wrap { border: 1px solid rgba(0,0,0,0.08); border-radius: 999px; }
413
- #raw-box .wrap>label { font-weight: 600; }
414
  """
415
 
416
  theme = gr.themes.Soft(
@@ -430,7 +384,7 @@ with gr.Blocks(theme=theme, css=messenger_css, analytics_enabled=False) as demo:
430
  debug_state = gr.State(value="")
431
 
432
  with gr.Group(elem_id="chat-wrap"):
433
- chatbot = gr.Chatbot(label="", height=520, elem_id="chat")
434
 
435
  with gr.Row(elem_id="bottom-bar"):
436
  msg = gr.MultimodalTextbox(
@@ -440,16 +394,14 @@ with gr.Blocks(theme=theme, css=messenger_css, analytics_enabled=False) as demo:
440
  )
441
  send = gr.Button("➤", variant="primary", elem_id="send")
442
 
443
- # Нижний блок сырой вывод Florence
444
- with gr.Box(elem_id="raw-box"):
445
  raw_out = gr.Textbox(
446
- label="Raw Florence output",
447
  value="",
448
  lines=14,
449
  show_copy_button=True
450
  )
451
 
452
- # Единственная подача: по Enter и по кнопке (одна видимая кнопка отправки)
453
  msg.submit(
454
  respond,
455
  inputs=[msg, chatbot, caption_state, asset_state, debug_state],
 
1
  #!/usr/bin/env python3
2
  # -*- coding: utf-8 -*-
 
 
 
 
 
 
 
 
 
3
 
4
  import os
5
  import io
 
85
  return out
86
 
87
  def _debug_dump_from_response(resp: requests.Response) -> str:
 
 
 
 
 
 
88
  lines = []
89
  data = resp.content
90
  ct = (resp.headers.get("content-type") or "").lower()
 
94
  lines.append(f"content-type: {ct}")
95
  lines.append(f"bytes: {len(data)}")
96
 
 
97
  if "application/json" in ct and not data.startswith(b"PK"):
98
  try:
99
  raw = resp.text
 
103
  lines.append(raw)
104
  return "\n".join(lines)
105
 
 
106
  if data.startswith(b"PK") or "zip" in ct or "octet-stream" in ct:
107
  lines.append("--- ZIP CONTENTS ---")
108
  try:
109
  with zipfile.ZipFile(io.BytesIO(data), "r") as z:
110
  for name in z.namelist():
111
  lines.append(f"* {name}")
 
112
  for name in z.namelist():
113
  low = name.lower()
114
  if low.endswith(".json") or low.endswith(".txt"):
 
122
  lines.append(f"[zip parse error: {e}]")
123
  return "\n".join(lines)
124
 
 
125
  try:
126
  txt = data.decode("utf-8", errors="ignore")
127
  except Exception:
 
131
  return "\n".join(lines)
132
 
133
  def _parse_vlm_text(resp: requests.Response) -> str:
 
 
 
134
  data = resp.content
135
  ct = (resp.headers.get("content-type") or "").lower()
136
 
 
137
  if "application/json" in ct and not data.startswith(b"PK"):
138
  try:
139
  obj = resp.json()
 
142
  except Exception:
143
  return ""
144
 
 
145
  if data.startswith(b"PK") or "zip" in ct or "octet-stream" in ct:
146
  try:
147
  with zipfile.ZipFile(io.BytesIO(data), "r") as z:
 
148
  for name in z.namelist():
149
  if not name.lower().endswith(".json"):
150
  continue
 
156
  return cands[0]
157
  except Exception:
158
  pass
 
159
  for name in z.namelist():
160
  if name.lower().endswith(".txt"):
161
  try:
 
168
  except Exception:
169
  return ""
170
 
 
171
  try:
172
  return data.decode("utf-8", errors="ignore").strip()
173
  except Exception:
174
  return ""
175
 
176
  def _call_florence(task_token: str, asset_id: str, text_prompt: Optional[str] = None) -> Tuple[str, str]:
 
 
 
177
  content = _vlm_content(task_token, asset_id, text_prompt)
178
  payload = {"messages": [{"role": "user", "content": content}]}
179
  headers = {
 
184
  "NVCF-FUNCTION-ASSET-IDS": asset_id,
185
  }
186
  resp = requests.post(NV_VLM_URL, headers=headers, json=payload, timeout=300)
 
187
  raw_dump = _debug_dump_from_response(resp) if resp is not None else "[no response]"
188
  if not resp.ok:
189
  return f"[VLM HTTP {resp.status_code}]", raw_dump
 
194
  return isinstance(text, str) and len(text.strip()) >= 3 and "изображений-результатов" not in text.lower()
195
 
196
  def get_caption_with_debug(image_path: str) -> Tuple[str, str, str]:
 
 
 
 
197
  asset_id = nvcf_upload_asset(image_path)
198
  attempts = [
199
  ("<MORE_DETAILED_CAPTION>", None),
 
236
  # --------------------- Чат-логика ---------------------
237
  def respond(
238
  message: Dict[str, Any],
239
+ chat_history: List[Dict[str, str]],
240
  last_caption: str,
241
  last_asset_id: str,
242
  last_debug: str
243
  ):
 
 
 
 
244
  text = (message or {}).get("text", "") if isinstance(message, dict) else str(message or "")
245
  files = (message or {}).get("files", []) if isinstance(message, dict) else []
246
 
 
257
 
258
  img_path = first_image_path(files)
259
 
 
260
  parts = []
261
  if text and text.strip():
262
  parts.append(text.strip())
 
265
  user_visible = "\n".join(parts) if parts else "🖐️"
266
 
267
  chat_history = chat_history or []
268
+ chat_history.append({"role": "user", "content": user_visible})
269
+ chat_history.append({"role": "assistant", "content": ""})
270
  yield {"text": "", "files": []}, chat_history, last_caption, last_asset_id, (last_debug or "")
271
 
272
  caption = last_caption or ""
273
  asset_id = last_asset_id or ""
274
  debug_raw = last_debug or ""
275
 
 
276
  if img_path:
277
+ chat_history[-1]["content"] = "🔎 Обрабатываю изображение во Florence…"
278
  yield {"text": "", "files": []}, chat_history, caption, asset_id, (debug_raw or "")
279
  try:
280
  caption, asset_id, debug_raw = get_caption_with_debug(img_path)
281
  except Exception as e:
282
  caption, debug_raw = "", f"[Florence error] {e}"
283
 
 
284
  if caption:
285
  system_prompt = (
286
  "You are a helpful multimodal assistant. "
 
296
  "If the user refers to an image but no caption is available, ask them to reattach the image."
297
  )
298
 
 
299
  user_text_for_llm = text or ("Describe the attached image." if caption else "Hi")
300
 
 
301
  assistant_accum = ""
302
  try:
303
  stream = llm.chat.completions.create(
 
316
  if not piece:
317
  continue
318
  assistant_accum += piece
319
+ chat_history[-1]["content"] = assistant_accum
 
320
  yield {"text": "", "files": []}, chat_history, caption, asset_id, (debug_raw or "")
321
+ except Exception:
 
322
  try:
323
  resp = llm.chat.completions.create(
324
  model="openai/gpt-oss-120b",
 
346
  final_text = str(resp)
347
  else:
348
  final_text = str(resp)
349
+ chat_history[-1]["content"] = final_text
350
  yield {"text": "", "files": []}, chat_history, caption, asset_id, (debug_raw or "")
351
  except Exception as e2:
352
+ chat_history[-1]["content"] = f"[Ошибка LLM: {e2}]"
353
  yield {"text": "", "files": []}, chat_history, caption, asset_id, (debug_raw or "")
354
 
355
  # --------------------- Интерфейс ---------------------
 
364
  #bottom-bar { position: sticky; bottom: 0; background: var(--body-background-fill); border-top: 1px solid rgba(0,0,0,0.06); padding: 8px; display: flex; gap: 8px; align-items: center; }
365
  #send { min-width: 42px; max-width: 42px; height: 42px; border-radius: 999px; }
366
  #msg .mm-wrap { border: 1px solid rgba(0,0,0,0.08); border-radius: 999px; }
367
+ #raw-wrap .wrap>label { font-weight: 600; }
368
  """
369
 
370
  theme = gr.themes.Soft(
 
384
  debug_state = gr.State(value="")
385
 
386
  with gr.Group(elem_id="chat-wrap"):
387
+ chatbot = gr.Chatbot(label="", height=520, elem_id="chat", type="messages")
388
 
389
  with gr.Row(elem_id="bottom-bar"):
390
  msg = gr.MultimodalTextbox(
 
394
  )
395
  send = gr.Button("➤", variant="primary", elem_id="send")
396
 
397
+ with gr.Accordion("Raw Florence output", open=True, elem_id="raw-wrap"):
 
398
  raw_out = gr.Textbox(
399
+ label="",
400
  value="",
401
  lines=14,
402
  show_copy_button=True
403
  )
404
 
 
405
  msg.submit(
406
  respond,
407
  inputs=[msg, chatbot, caption_state, asset_state, debug_state],