Duibonduil commited on
Commit
b87e166
·
verified ·
1 Parent(s): 00e6138

Upload 3 files

Browse files
examples/tools/document/actions.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding: utf-8
2
+ # Copyright (c) 2025 inclusionAI.
3
+ from examples.tools.tool_action import DocumentExecuteAction
4
+ from aworld.core.tool.action_factory import ActionFactory
5
+ from aworld.core.tool.action import ExecutableAction
6
+
7
+
8
+ @ActionFactory.register(name=DocumentExecuteAction.DOCUMENT_ANALYSIS.value.name,
9
+ desc=DocumentExecuteAction.DOCUMENT_ANALYSIS.value.desc,
10
+ tool_name="document_analysis")
11
+ class ExecuteAction(ExecutableAction):
12
+ """Only one action, define it, implemented can be omitted. Act in tool."""
examples/tools/document/document.py ADDED
@@ -0,0 +1,527 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding: utf-8
2
+ # Copyright (c) 2025 inclusionAI.
3
+
4
+ import json
5
+ import os
6
+ import base64
7
+ import tempfile
8
+ import subprocess
9
+ from typing import Any, Dict, Tuple
10
+ from urllib.parse import urlparse
11
+
12
+ from pydantic import BaseModel
13
+
14
+ from aworld.config import ToolConfig
15
+ from examples.tools.tool_action import DocumentExecuteAction
16
+ from aworld.core.common import Observation, ActionModel, ActionResult
17
+ from aworld.core.tool.base import ToolFactory, Tool
18
+ from aworld.logs.util import logger
19
+ from examples.tools.document.utils import encode_image_from_file, encode_image_from_url
20
+ from aworld.utils import import_package, import_packages
21
+ from aworld.tools.utils import build_observation
22
+
23
+
24
+ class InputDocument(BaseModel):
25
+ document_path: str | None = None
26
+
27
+
28
+ @ToolFactory.register(name="document_analysis",
29
+ desc="document analysis",
30
+ supported_action=DocumentExecuteAction,
31
+ conf_file_name=f'document_analysis_tool.yaml')
32
+ class DocumentTool(Tool):
33
+ def __init__(self, conf: ToolConfig, **kwargs) -> None:
34
+ """Init document tool."""
35
+ import_package('cv2', install_name='opencv-python')
36
+ import_packages(['xmltodict', 'pandas', 'docx2markdown', 'PyPDF2', 'numpy'])
37
+ super(DocumentTool, self).__init__(conf, **kwargs)
38
+ self.cur_observation = None
39
+ self.content = None
40
+ self.keyframes = []
41
+ self.init()
42
+ self.step_finished = True
43
+
44
+ def reset(self, *, seed: int | None = None, options: Dict[str, str] | None = None) -> Tuple[
45
+ Observation, dict[str, Any]]:
46
+ super().reset(seed=seed, options=options)
47
+
48
+ self.close()
49
+ self.step_finished = True
50
+ return build_observation(observer=self.name(),
51
+ ability=DocumentExecuteAction.DOCUMENT_ANALYSIS.value.name), {}
52
+
53
+ def init(self) -> None:
54
+ self.initialized = True
55
+
56
+ def close(self) -> None:
57
+ pass
58
+
59
+ def finished(self) -> bool:
60
+ return self.step_finished
61
+
62
+ def do_step(self, actions: list[ActionModel], **kwargs) -> Tuple[Observation, float, bool, bool, Dict[str, Any]]:
63
+ self.step_finished = False
64
+ reward = 0.
65
+ fail_error = ""
66
+ observation = build_observation(observer=self.name(),
67
+ ability=DocumentExecuteAction.DOCUMENT_ANALYSIS.value.name)
68
+ info = {}
69
+ try:
70
+ if not actions:
71
+ raise ValueError("actions is empty")
72
+ action = actions[0]
73
+ document_path = action.params.get("document_path", "")
74
+ if not document_path:
75
+ raise ValueError("document path invalid")
76
+ output, keyframes, error = self.document_analysis(document_path)
77
+ observation.content = output
78
+ observation.action_result.append(
79
+ ActionResult(is_done=True,
80
+ success=False if error else True,
81
+ content=f"{output}",
82
+ error=f"{error}",
83
+ keep=False))
84
+ info['key_frame'] = f"{keyframes}"
85
+ reward = 1.
86
+ except Exception as e:
87
+ fail_error = str(e)
88
+ finally:
89
+ self.step_finished = True
90
+ info["exception"] = fail_error
91
+ info.update(kwargs)
92
+ return (observation, reward, kwargs.get("terminated", False),
93
+ kwargs.get("truncated", False), info)
94
+
95
+ def document_analysis(self, document_path):
96
+ import xmltodict
97
+ error = None
98
+ # Initialize content to empty list to avoid None return
99
+ self.content = []
100
+ try:
101
+ if any(document_path.endswith(ext) for ext in [".jpg", ".jpeg", ".png"]):
102
+ parsed_url = urlparse(document_path)
103
+ is_url = all([parsed_url.scheme, parsed_url.netloc])
104
+ if not is_url:
105
+ base64_image = encode_image_from_file(document_path)
106
+ else:
107
+ base64_image = encode_image_from_url(document_path)
108
+ self.content = f"data:image/jpeg;base64,{base64_image}"
109
+
110
+ if any(document_path.endswith(ext) for ext in ["xls", "xlsx"]):
111
+ try:
112
+ try:
113
+ import pandas as pd
114
+ except ImportError:
115
+ error = "pandas library not found. Please install pandas: pip install pandas"
116
+ return self.content, self.keyframes, error
117
+
118
+ excel_data = {}
119
+
120
+ with pd.ExcelFile(document_path) as xls:
121
+ sheet_names = xls.sheet_names
122
+ for sheet_name in sheet_names:
123
+ df = pd.read_excel(xls, sheet_name=sheet_name)
124
+ sheet_data = df.to_dict(orient='records')
125
+ excel_data[sheet_name] = sheet_data
126
+
127
+ self.content = json.dumps(excel_data, ensure_ascii=False)
128
+ logger.info(f"Successfully processed Excel file: {document_path}")
129
+ logger.info(f"Found {len(sheet_names)} sheets: {', '.join(sheet_names)}")
130
+
131
+ except Exception as excel_error:
132
+ error = str(excel_error)
133
+
134
+ if any(document_path.endswith(ext) for ext in ["json", "jsonl", "jsonld"]):
135
+ with open(document_path, "r", encoding="utf-8") as f:
136
+ self.content = json.load(f)
137
+ f.close()
138
+
139
+ if any(document_path.endswith(ext) for ext in ["xml"]):
140
+ data = None
141
+ with open(document_path, "r", encoding="utf-8") as f:
142
+ data = f.read()
143
+ f.close()
144
+
145
+ try:
146
+ self.content = xmltodict.parse(data)
147
+ logger.info(f"The extracted xml data is: {self.content}")
148
+
149
+ except Exception as e:
150
+ logger.info(f"The raw xml data is: {data}")
151
+ error = str(e)
152
+ self.content = data
153
+
154
+ if any(document_path.endswith(ext) for ext in ["doc", "docx"]):
155
+ from docx2markdown._docx_to_markdown import docx_to_markdown
156
+ file_name = os.path.basename(document_path)
157
+ md_file_path = f"{file_name}.md"
158
+ docx_to_markdown(document_path, md_file_path)
159
+ with open(md_file_path, "r") as f:
160
+ self.content = f.read()
161
+ f.close()
162
+
163
+ if any(document_path.endswith(ext) for ext in ["pdf"]):
164
+ # try using pypdf to extract text from pdf
165
+ try:
166
+ from PyPDF2 import PdfReader
167
+
168
+ # Open file in binary mode for PdfReader
169
+ f = open(document_path, "rb")
170
+ reader = PdfReader(f)
171
+ extracted_text = ""
172
+ for page in reader.pages:
173
+ extracted_text += page.extract_text()
174
+ self.content = extracted_text
175
+ f.close()
176
+ except Exception as pdf_error:
177
+ error = str(pdf_error)
178
+
179
+ # audio
180
+ if any(document_path.endswith(ext.lower()) for ext in [".mp3", ".wav", ".wave"]):
181
+ try:
182
+ # audio-> base64
183
+ with open(document_path, "rb") as audio_file:
184
+ audio_bytes = audio_file.read()
185
+ audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
186
+
187
+ # ext
188
+ ext = os.path.splitext(document_path)[1].lower()
189
+ mime_type = "audio/mpeg" if ext == ".mp3" else "audio/wav"
190
+
191
+ # data URI
192
+ self.content = f"data:{mime_type};base64,{audio_base64}"
193
+ except Exception as audio_error:
194
+ error = str(audio_error)
195
+ logger.error(f"Error processing audio file: {error}")
196
+
197
+ # video
198
+ if any(document_path.endswith(ext.lower()) for ext in [".mp4", ".avi", ".mov", ".mkv", ".flv", ".wmv"]):
199
+ try:
200
+ try:
201
+ import cv2
202
+ import numpy as np
203
+ except ImportError:
204
+ error = "Required libraries not found. Please install opencv-python: pip install opencv-python"
205
+ return None, None, error
206
+
207
+ # create temp dir
208
+ temp_dir = tempfile.mkdtemp()
209
+
210
+ # 1.get audio -> base64
211
+ audio_path = os.path.join(temp_dir, "extracted_audio.mp3")
212
+
213
+ # get audio by ffmpeg
214
+ try:
215
+ subprocess.run([
216
+ "ffmpeg", "-i", document_path, "-q:a", "0",
217
+ "-map", "a", audio_path, "-y"
218
+ ], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
219
+
220
+ # audio->base64
221
+ with open(audio_path, "rb") as audio_file:
222
+ audio_bytes = audio_file.read()
223
+ audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
224
+
225
+ audio_data_uri = f"data:audio/mpeg;base64,{audio_base64}"
226
+ except (subprocess.SubprocessError, FileNotFoundError) as e:
227
+ logger.warning(f"Failed to extract audio: {str(e)}")
228
+ audio_data_uri = None
229
+
230
+ # 2. get keyframes
231
+ cap = cv2.VideoCapture(document_path)
232
+
233
+ if not cap.isOpened():
234
+ raise ValueError(f"Could not open video file: {document_path}")
235
+
236
+ # get video message
237
+ fps = cap.get(cv2.CAP_PROP_FPS)
238
+ frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
239
+ duration = frame_count / fps if fps > 0 else 0
240
+
241
+ # keyframes policy- per duration/10s,max 10
242
+ keyframes_count = min(10, int(frame_count))
243
+ frames_interval = max(1, int(frame_count / keyframes_count))
244
+
245
+ self.keyframes = []
246
+ frame_index = 0
247
+
248
+ while True:
249
+ ret, frame = cap.read()
250
+ if not ret:
251
+ break
252
+
253
+ # per frames_interval save
254
+ if frame_index % frames_interval == 0:
255
+ # save JPEG -> base64
256
+ _, buffer = cv2.imencode(".jpg", frame)
257
+ img_base64 = base64.b64encode(buffer).decode('utf-8')
258
+ time_position = frame_index / fps if fps > 0 else 0
259
+
260
+ self.keyframes.append(f"data:image/jpeg;base64,{img_base64}")
261
+
262
+ if len(self.keyframes) >= keyframes_count:
263
+ break
264
+
265
+ frame_index += 1
266
+
267
+ cap.release()
268
+
269
+ self.content = audio_data_uri
270
+ logger.info(f"Successfully processed video file: {document_path}")
271
+ logger.info(f"Extracted {len(self.keyframes)} keyframes and audio track")
272
+ # clean tmp files
273
+ try:
274
+ os.remove(audio_path)
275
+ os.rmdir(temp_dir)
276
+ except Exception as cleanup_error:
277
+ logger.warning(f"Error cleaning up temp files: {str(cleanup_error)}")
278
+
279
+ except Exception as video_error:
280
+ error = str(video_error)
281
+ logger.error(f"Error processing video file: {error}")
282
+
283
+ if any(document_path.endswith(ext) for ext in ["pptx"]):
284
+ try:
285
+ # Initialize content list and empty keyframes
286
+ self.content = []
287
+ self.keyframes = []
288
+
289
+ # Check if file exists
290
+ if not os.path.exists(document_path):
291
+ error = f"File does not exist: {document_path}"
292
+ return self.content, self.keyframes, error
293
+
294
+ # Check if file is readable
295
+ if not os.access(document_path, os.R_OK):
296
+ error = f"File is not readable: {document_path}"
297
+ return self.content, self.keyframes, error
298
+
299
+ # Check file size
300
+ try:
301
+ file_size = os.path.getsize(document_path)
302
+
303
+ if file_size == 0:
304
+ error = "File is empty"
305
+ return self.content, self.keyframes, error
306
+ except Exception as size_error:
307
+ logger.warning(f"Cannot get file size: {str(size_error)}")
308
+
309
+ try:
310
+ # Import required libraries
311
+ from pptx import Presentation
312
+ from PIL import Image, ImageDraw, ImageFont
313
+ import io
314
+ except ImportError as import_error:
315
+ error = f"Missing required libraries: {str(import_error)}. Please install: pip install python-pptx Pillow"
316
+ return self.content, self.keyframes, error
317
+
318
+ # Create temporary directory for images
319
+ try:
320
+ temp_dir = tempfile.mkdtemp()
321
+ except Exception as temp_dir_error:
322
+ error = f"Failed to create temporary directory: {str(temp_dir_error)}"
323
+ return self.content, self.keyframes, error
324
+
325
+ # Open presentation
326
+ try:
327
+ presentation = Presentation(document_path)
328
+ # Get total slides count
329
+ total_slides = len(presentation.slides)
330
+
331
+ if total_slides == 0:
332
+ error = "PPTX file does not contain any slides"
333
+ return self.content, self.keyframes, error
334
+
335
+ # Process each slide
336
+ for i, slide in enumerate(presentation.slides):
337
+
338
+ # Generate temporary file path for current slide
339
+ img_path = os.path.join(temp_dir, f"slide_{i + 1}.jpg")
340
+
341
+ # Get slide dimensions
342
+ try:
343
+ slide_width = presentation.slide_width
344
+ slide_height = presentation.slide_height
345
+
346
+ # PPTX dimensions are in EMU (English Metric Unit)
347
+ # 1 inch = 914400 EMU, 1 cm = 360000 EMU
348
+ # Convert to pixels (assuming 96 DPI)
349
+ slide_width_px = int(slide_width / 914400 * 96 * 10)
350
+ slide_height_px = int(slide_height / 914400 * 96 * 10)
351
+
352
+ # Ensure dimensions are reasonable positive integers
353
+ slide_width_px = max(1, min(slide_width_px, 4000)) # Limit max width to 4000px
354
+ slide_height_px = max(1, min(slide_height_px, 3000)) # Limit max height to 3000px
355
+
356
+ except Exception as size_error:
357
+ # Use default dimensions
358
+ slide_width_px = 960 # Default width 960px
359
+ slide_height_px = 720 # Default height 720px
360
+
361
+ # Create blank image
362
+ try:
363
+ # Log operation start
364
+
365
+ # Create blank image
366
+ try:
367
+ slide_img = Image.new('RGB', (slide_width_px, slide_height_px), 'white')
368
+ draw = ImageDraw.Draw(slide_img)
369
+ except Exception as img_create_error:
370
+ logger.error(
371
+ f"Slide {i + 1} blank image creation failed: {str(img_create_error) or 'Unknown error'}")
372
+ raise
373
+
374
+ # Draw slide number
375
+ try:
376
+ font = ImageFont.load_default()
377
+ draw.text((20, 20), f"Slide {i + 1}/{total_slides}", fill="black", font=font)
378
+ except Exception as font_error:
379
+ logger.warning(f"Failed to draw slide number: {str(font_error) or 'Unknown error'}")
380
+
381
+ # Record shape count
382
+ try:
383
+ shape_count = len(slide.shapes)
384
+ except Exception as shape_count_error:
385
+ logger.warning(
386
+ f"Failed to get slide {i + 1} shape count: {str(shape_count_error) or 'Unknown error'}")
387
+ shape_count = 0
388
+
389
+ # Try to render shapes on image
390
+ shape_success_count = 0
391
+ shape_fail_count = 0
392
+
393
+ try:
394
+ for j, shape in enumerate(slide.shapes):
395
+ try:
396
+ shape_type = type(shape).__name__
397
+
398
+ # Process images
399
+ if hasattr(shape, 'image') and shape.image:
400
+ try:
401
+ # Extract image from shape
402
+ image_stream = io.BytesIO(shape.image.blob)
403
+ img = Image.open(image_stream)
404
+
405
+ # Calculate position
406
+ left = shape.left
407
+ top = shape.top
408
+
409
+ # Paste image onto slide
410
+ slide_img.paste(img, (left, top))
411
+ shape_success_count += 1
412
+ except Exception as img_error:
413
+ logger.warning(
414
+ f"Failed to process image {j + 1} in slide {i + 1}: {str(img_error) or 'Unknown error'}")
415
+ if not str(img_error):
416
+ import traceback
417
+ logger.warning(
418
+ f"Image processing stack: {traceback.format_exc()}")
419
+ shape_fail_count += 1
420
+
421
+ # Process text
422
+ elif hasattr(shape, 'text') and shape.text:
423
+ try:
424
+ text = shape.text[:30] + "..." if len(
425
+ shape.text) > 30 else shape.text
426
+ # Simple text rendering
427
+ text_left = shape.left
428
+ text_top = shape.top
429
+ draw.text((text_left, text_top), shape.text, fill="black",
430
+ font=font)
431
+ shape_success_count += 1
432
+ except Exception as text_error:
433
+ logger.warning(
434
+ f"Failed to process text {j + 1} in slide {i + 1}: {str(text_error) or 'Unknown error'}")
435
+ if not str(text_error):
436
+ import traceback
437
+ logger.warning(
438
+ f"Text processing stack: {traceback.format_exc()}")
439
+ shape_fail_count += 1
440
+ else:
441
+ logger.info(
442
+ f"Shape {j + 1} in slide {i + 1} is neither image nor text, skipping")
443
+ except Exception as shape_error:
444
+ if not str(shape_error):
445
+ import traceback
446
+ logger.warning(f"Shape processing stack: {traceback.format_exc()}")
447
+ shape_fail_count += 1
448
+ except Exception as shapes_iteration_error:
449
+ logger.error(
450
+ f"Failed while iterating through shapes in slide {i + 1}: {str(shapes_iteration_error) or 'Unknown error'}")
451
+ if not str(shapes_iteration_error):
452
+ import traceback
453
+ logger.error(f"Shape iteration stack: {traceback.format_exc()}")
454
+
455
+ # Save slide image
456
+ try:
457
+ slide_img.save(img_path, 'JPEG')
458
+
459
+ # Check if image was saved successfully
460
+ if not os.path.exists(img_path):
461
+ raise ValueError(f"Saved image file does not exist: {img_path}")
462
+
463
+ file_size = os.path.getsize(img_path)
464
+ if file_size == 0:
465
+ raise ValueError(
466
+ f"Saved image file is empty: {img_path}, size: {file_size} bytes")
467
+
468
+ # Convert to base64
469
+ try:
470
+ base64_image = encode_image_from_file(img_path)
471
+ self.content.append(f"data:image/jpeg;base64,{base64_image}")
472
+ except Exception as base64_error:
473
+ error_msg = str(base64_error) or "Unknown base64 conversion error"
474
+ if not str(base64_error):
475
+ import traceback
476
+ logger.error(f"Base64 conversion stack: {traceback.format_exc()}")
477
+ raise ValueError(f"Base64 conversion error: {error_msg}")
478
+
479
+ except Exception as save_error:
480
+ error_msg = str(save_error) or "Unknown save error"
481
+ logger.error(f"Failed to save slide {i + 1} as image: {error_msg}")
482
+ if not str(save_error):
483
+ import traceback
484
+ logger.error(f"Image save stack: {traceback.format_exc()}")
485
+ raise ValueError(f"Image save error: {error_msg}")
486
+
487
+ except Exception as slide_render_error:
488
+ error_msg = str(slide_render_error) or "Unknown rendering error"
489
+ logger.error(f"Failed to render slide {i + 1}: {error_msg}")
490
+ if not str(slide_render_error):
491
+ import traceback
492
+ logger.error(f"Slide rendering stack: {traceback.format_exc()}")
493
+ # Continue processing next slide, don't interrupt the entire process
494
+ continue
495
+
496
+ except Exception as pptx_error:
497
+ error = f"Failed to process PPTX file: {str(pptx_error)}"
498
+ import traceback
499
+
500
+ # Clean up temporary files
501
+ try:
502
+ for file in os.listdir(temp_dir):
503
+ try:
504
+ file_path = os.path.join(temp_dir, file)
505
+ os.remove(file_path)
506
+ except Exception as file_error:
507
+ logger.warning(f"Failed to delete temporary file: {str(file_error)}")
508
+ os.rmdir(temp_dir)
509
+ except Exception as cleanup_error:
510
+ logger.warning(f"Failed to clean up temporary files: {str(cleanup_error)}")
511
+
512
+ if len(self.content) > 0:
513
+ logger.info(f"Extracted {len(self.content)} slides")
514
+ else:
515
+ error = error or "Could not extract any slides from PPTX file"
516
+ logger.error(error)
517
+
518
+ except Exception as outer_error:
519
+ error = f"Error occurred during PPTX file processing: {str(outer_error)}"
520
+ import traceback
521
+
522
+ return self.content, self.keyframes, error
523
+
524
+ finally:
525
+ pass
526
+
527
+ return self.content, self.keyframes, error
examples/tools/document/utils.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding: utf-8
2
+ # Copyright (c) 2025 inclusionAI.
3
+ import base64
4
+ from io import BytesIO
5
+
6
+
7
+ def encode_image_from_url(image_url):
8
+ from aworld.utils.import_package import import_package
9
+ import_package("requests")
10
+
11
+ import requests
12
+ from PIL import Image
13
+
14
+ response = requests.get(image_url)
15
+ image = Image.open(BytesIO(response.content))
16
+
17
+ max_size = 1024
18
+ if max(image.size) > max_size:
19
+ ratio = max_size / max(image.size)
20
+ new_size = (int(image.size[0] * ratio), int(image.size[1] * ratio))
21
+ image = image.resize(new_size, Image.LANCZOS)
22
+
23
+ buffered = BytesIO()
24
+ image_format = image.format if image.format else 'JPEG'
25
+ image.save(buffered, format=image_format)
26
+ img_str = base64.b64encode(buffered.getvalue()).decode()
27
+ return img_str
28
+
29
+
30
+ def encode_image_from_file(image_path):
31
+ with open(image_path, "rb") as image_file:
32
+ return base64.b64encode(image_file.read()).decode()