Kevin Hu
commited on
Commit
·
b07ff91
1
Parent(s):
d1bf860
Accelerate titles' embeddings. (#4492)
Browse files### What problem does this PR solve?
### Type of change
- [x] Performance Improvement
deepdoc/vision/layout_recognizer.py
CHANGED
|
@@ -78,7 +78,7 @@ class LayoutRecognizer(Recognizer):
|
|
| 78 |
"x0": b["bbox"][0] / scale_factor, "x1": b["bbox"][2] / scale_factor,
|
| 79 |
"top": b["bbox"][1] / scale_factor, "bottom": b["bbox"][-1] / scale_factor,
|
| 80 |
"page_number": pn,
|
| 81 |
-
} for b in lts if float(b["score"]) >= 0.
|
| 82 |
lts = self.sort_Y_firstly(lts, np.mean(
|
| 83 |
[lt["bottom"] - lt["top"] for lt in lts]) / 2)
|
| 84 |
lts = self.layouts_cleanup(bxs, lts)
|
|
|
|
| 78 |
"x0": b["bbox"][0] / scale_factor, "x1": b["bbox"][2] / scale_factor,
|
| 79 |
"top": b["bbox"][1] / scale_factor, "bottom": b["bbox"][-1] / scale_factor,
|
| 80 |
"page_number": pn,
|
| 81 |
+
} for b in lts if float(b["score"]) >= 0.4 or b["type"] not in self.garbage_layouts]
|
| 82 |
lts = self.sort_Y_firstly(lts, np.mean(
|
| 83 |
[lt["bottom"] - lt["top"] for lt in lts]) / 2)
|
| 84 |
lts = self.layouts_cleanup(bxs, lts)
|
rag/svr/task_executor.py
CHANGED
|
@@ -354,16 +354,9 @@ def embedding(docs, mdl, parser_config=None, callback=None):
|
|
| 354 |
|
| 355 |
tk_count = 0
|
| 356 |
if len(tts) == len(cnts):
|
| 357 |
-
|
| 358 |
-
for
|
| 359 |
-
|
| 360 |
-
if len(tts_) == 0:
|
| 361 |
-
tts_ = vts
|
| 362 |
-
else:
|
| 363 |
-
tts_ = np.concatenate((tts_, vts), axis=0)
|
| 364 |
-
tk_count += c
|
| 365 |
-
callback(prog=0.6 + 0.1 * (i + 1) / len(tts), msg="")
|
| 366 |
-
tts = tts_
|
| 367 |
|
| 368 |
cnts_ = np.array([])
|
| 369 |
for i in range(0, len(cnts), batch_size):
|
|
|
|
| 354 |
|
| 355 |
tk_count = 0
|
| 356 |
if len(tts) == len(cnts):
|
| 357 |
+
vts, c = mdl.encode(tts[0: 1])
|
| 358 |
+
tts = np.concatenate([vts for _ in range(len(tts))], axis=0)
|
| 359 |
+
tk_count += c
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 360 |
|
| 361 |
cnts_ = np.array([])
|
| 362 |
for i in range(0, len(cnts), batch_size):
|