Spaces:
Running
Running
Merge branch 'main' into add-pdf-viewer
Browse files
document_qa/grobid_processors.py
CHANGED
|
@@ -467,6 +467,11 @@ class GrobidMaterialsProcessor(BaseProcessor):
|
|
| 467 |
if status != 200:
|
| 468 |
result = []
|
| 469 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 470 |
compositions = []
|
| 471 |
for material in result:
|
| 472 |
if 'resolvedFormulas' in material:
|
|
@@ -476,7 +481,8 @@ class GrobidMaterialsProcessor(BaseProcessor):
|
|
| 476 |
elif 'formula' in material:
|
| 477 |
if 'formulaComposition' in material['formula']:
|
| 478 |
compositions.append(material['formula']['formulaComposition'])
|
| 479 |
-
|
|
|
|
| 480 |
return compositions
|
| 481 |
|
| 482 |
@staticmethod
|
|
@@ -514,6 +520,12 @@ class GrobidAggregationProcessor(GrobidProcessor, GrobidQuantitiesProcessor, Gro
|
|
| 514 |
entities = self.prune_overlapping_annotations(all_entities)
|
| 515 |
return entities
|
| 516 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 517 |
@staticmethod
|
| 518 |
def prune_overlapping_annotations(entities: list) -> list:
|
| 519 |
# Sorting by offsets
|
|
|
|
| 467 |
if status != 200:
|
| 468 |
result = []
|
| 469 |
|
| 470 |
+
compositions = self.output_info(result)
|
| 471 |
+
|
| 472 |
+
return compositions
|
| 473 |
+
|
| 474 |
+
def output_info(self, result):
|
| 475 |
compositions = []
|
| 476 |
for material in result:
|
| 477 |
if 'resolvedFormulas' in material:
|
|
|
|
| 481 |
elif 'formula' in material:
|
| 482 |
if 'formulaComposition' in material['formula']:
|
| 483 |
compositions.append(material['formula']['formulaComposition'])
|
| 484 |
+
if 'name' in material:
|
| 485 |
+
compositions.append(material['name'])
|
| 486 |
return compositions
|
| 487 |
|
| 488 |
@staticmethod
|
|
|
|
| 520 |
entities = self.prune_overlapping_annotations(all_entities)
|
| 521 |
return entities
|
| 522 |
|
| 523 |
+
def extract_quantities(self, text):
|
| 524 |
+
return self.gqp.extract_quantities(text)
|
| 525 |
+
|
| 526 |
+
def extract_materials(self, text):
|
| 527 |
+
return self.gmp.extract_materials(text)
|
| 528 |
+
|
| 529 |
@staticmethod
|
| 530 |
def prune_overlapping_annotations(entities: list) -> list:
|
| 531 |
# Sorting by offsets
|