Kevin Hu
commited on
Commit
·
7b6896b
1
Parent(s):
77dc93a
refine error log while chunking (#1937)
Browse files### What problem does this PR solve?
### Type of change
- [x] Refactoring
- api/apps/document_app.py +3 -1
- rag/svr/task_executor.py +16 -11
api/apps/document_app.py
CHANGED
|
@@ -501,7 +501,9 @@ def upload_and_parse():
|
|
| 501 |
"callback": dummy,
|
| 502 |
"parser_config": parser_config,
|
| 503 |
"from_page": 0,
|
| 504 |
-
"to_page": 100000
|
|
|
|
|
|
|
| 505 |
}
|
| 506 |
threads.append(exe.submit(FACTORY.get(d["parser_id"], naive).chunk, d["name"], blob, **kwargs))
|
| 507 |
|
|
|
|
| 501 |
"callback": dummy,
|
| 502 |
"parser_config": parser_config,
|
| 503 |
"from_page": 0,
|
| 504 |
+
"to_page": 100000,
|
| 505 |
+
"tenant_id": kb.tenant_id,
|
| 506 |
+
"lang": kb.language
|
| 507 |
}
|
| 508 |
threads.append(exe.submit(FACTORY.get(d["parser_id"], naive).chunk, d["name"], blob, **kwargs))
|
| 509 |
|
rag/svr/task_executor.py
CHANGED
|
@@ -146,27 +146,32 @@ def build(row):
|
|
| 146 |
binary = get_minio_binary(bucket, name)
|
| 147 |
cron_logger.info(
|
| 148 |
"From minio({}) {}/{}".format(timer() - st, row["location"], row["name"]))
|
| 149 |
-
cks = chunker.chunk(row["name"], binary=binary, from_page=row["from_page"],
|
| 150 |
-
to_page=row["to_page"], lang=row["language"], callback=callback,
|
| 151 |
-
kb_id=row["kb_id"], parser_config=row["parser_config"], tenant_id=row["tenant_id"])
|
| 152 |
-
cron_logger.info(
|
| 153 |
-
"Chunkking({}) {}/{}".format(timer() - st, row["location"], row["name"]))
|
| 154 |
except TimeoutError as e:
|
| 155 |
-
callback(-1, f"Internal server error: Fetch file timeout. Could you try it again.")
|
| 156 |
cron_logger.error(
|
| 157 |
-
"
|
| 158 |
return
|
| 159 |
except Exception as e:
|
| 160 |
if re.search("(No such file|not found)", str(e)):
|
| 161 |
-
callback(-1, "Can not find file <%s>" % row["name"])
|
| 162 |
else:
|
| 163 |
-
callback(-1, f"
|
| 164 |
str(e).replace("'", ""))
|
| 165 |
traceback.print_exc()
|
|
|
|
| 166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
cron_logger.error(
|
| 168 |
-
"
|
| 169 |
-
|
| 170 |
return
|
| 171 |
|
| 172 |
docs = []
|
|
|
|
| 146 |
binary = get_minio_binary(bucket, name)
|
| 147 |
cron_logger.info(
|
| 148 |
"From minio({}) {}/{}".format(timer() - st, row["location"], row["name"]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
except TimeoutError as e:
|
| 150 |
+
callback(-1, f"Internal server error: Fetch file from minio timeout. Could you try it again.")
|
| 151 |
cron_logger.error(
|
| 152 |
+
"Minio {}/{}: Fetch file from minio timeout.".format(row["location"], row["name"]))
|
| 153 |
return
|
| 154 |
except Exception as e:
|
| 155 |
if re.search("(No such file|not found)", str(e)):
|
| 156 |
+
callback(-1, "Can not find file <%s> from minio. Could you try it again?" % row["name"])
|
| 157 |
else:
|
| 158 |
+
callback(-1, f"Get file from minio: %s" %
|
| 159 |
str(e).replace("'", ""))
|
| 160 |
traceback.print_exc()
|
| 161 |
+
return
|
| 162 |
|
| 163 |
+
try:
|
| 164 |
+
cks = chunker.chunk(row["name"], binary=binary, from_page=row["from_page"],
|
| 165 |
+
to_page=row["to_page"], lang=row["language"], callback=callback,
|
| 166 |
+
kb_id=row["kb_id"], parser_config=row["parser_config"], tenant_id=row["tenant_id"])
|
| 167 |
+
cron_logger.info(
|
| 168 |
+
"Chunking({}) {}/{}".format(timer() - st, row["location"], row["name"]))
|
| 169 |
+
except Exception as e:
|
| 170 |
+
callback(-1, f"Internal server error while chunking: %s" %
|
| 171 |
+
str(e).replace("'", ""))
|
| 172 |
cron_logger.error(
|
| 173 |
+
"Chunking {}/{}: {}".format(row["location"], row["name"], str(e)))
|
| 174 |
+
traceback.print_exc()
|
| 175 |
return
|
| 176 |
|
| 177 |
docs = []
|