Spaces:
Sleeping
Sleeping
Update tools.py
Browse files
tools.py
CHANGED
@@ -22,10 +22,11 @@ def _download_file_for_task(task_id: str, ext: str) -> str:
|
|
22 |
os.makedirs("hf_files", exist_ok=True)
|
23 |
local_path = os.path.join("hf_files", f"{task_id}.{ext}")
|
24 |
url = f"{DEFAULT_API_URL}/files/{task_id}"
|
25 |
-
|
26 |
try:
|
27 |
resp = requests.get(url, timeout=10)
|
28 |
if resp.status_code == 200 and resp.content:
|
|
|
29 |
with open(local_path, "wb") as f:
|
30 |
f.write(resp.content)
|
31 |
return local_path
|
@@ -56,7 +57,7 @@ def web_search_tool(state: AgentState) -> AgentState:
|
|
56 |
break
|
57 |
if attempt < max_retries - 1:
|
58 |
print(f"web_search_result: rate limit error, retrying in 10 seconds")
|
59 |
-
time.sleep(
|
60 |
print(f"web_search_result reached ")
|
61 |
return {
|
62 |
"web_search_query": None,
|
@@ -102,7 +103,7 @@ def ocr_image_tool(state: AgentState) -> AgentState:
|
|
102 |
text = pytesseract.image_to_string(img).strip() or "(no visible text)"
|
103 |
except Exception as e:
|
104 |
text = f"Error during OCR: {e}"
|
105 |
-
|
106 |
return {
|
107 |
"ocr_path": None,
|
108 |
"ocr_result": text
|
@@ -179,7 +180,7 @@ def parse_excel_tool(state: AgentState) -> AgentState:
|
|
179 |
# 4) Strip out separator rows and return the table block
|
180 |
clean_rows = [row for row in table_lines if not re.match(r"^\s*\|\s*-+", row)]
|
181 |
table_block = "\n".join(clean_rows).strip()
|
182 |
-
|
183 |
return {
|
184 |
"excel_path": None,
|
185 |
"excel_sheet_name": None,
|
@@ -258,7 +259,7 @@ def audio_transcriber_tool(state: AgentState) -> AgentState:
|
|
258 |
text = response.get("text", "").strip()
|
259 |
except Exception as e:
|
260 |
text = f"Error during transcription: {e}"
|
261 |
-
|
262 |
return {
|
263 |
"audio_path": None,
|
264 |
"transcript": text
|
|
|
22 |
os.makedirs("hf_files", exist_ok=True)
|
23 |
local_path = os.path.join("hf_files", f"{task_id}.{ext}")
|
24 |
url = f"{DEFAULT_API_URL}/files/{task_id}"
|
25 |
+
|
26 |
try:
|
27 |
resp = requests.get(url, timeout=10)
|
28 |
if resp.status_code == 200 and resp.content:
|
29 |
+
print(f"Downloaded file from {url} to {local_path}")
|
30 |
with open(local_path, "wb") as f:
|
31 |
f.write(resp.content)
|
32 |
return local_path
|
|
|
57 |
break
|
58 |
if attempt < max_retries - 1:
|
59 |
print(f"web_search_result: rate limit error, retrying in 10 seconds")
|
60 |
+
time.sleep(4)
|
61 |
print(f"web_search_result reached ")
|
62 |
return {
|
63 |
"web_search_query": None,
|
|
|
103 |
text = pytesseract.image_to_string(img).strip() or "(no visible text)"
|
104 |
except Exception as e:
|
105 |
text = f"Error during OCR: {e}"
|
106 |
+
print(f"OCRed as ocr_result: {text}")
|
107 |
return {
|
108 |
"ocr_path": None,
|
109 |
"ocr_result": text
|
|
|
180 |
# 4) Strip out separator rows and return the table block
|
181 |
clean_rows = [row for row in table_lines if not re.match(r"^\s*\|\s*-+", row)]
|
182 |
table_block = "\n".join(clean_rows).strip()
|
183 |
+
print(f"Parsed excel as excel_result: {table_block}")
|
184 |
return {
|
185 |
"excel_path": None,
|
186 |
"excel_sheet_name": None,
|
|
|
259 |
text = response.get("text", "").strip()
|
260 |
except Exception as e:
|
261 |
text = f"Error during transcription: {e}"
|
262 |
+
print(f"Transcripted as transcript: {text}")
|
263 |
return {
|
264 |
"audio_path": None,
|
265 |
"transcript": text
|