Kevin Hu
commited on
Commit
·
d453e49
1
Parent(s):
6c993fc
Make infinity adapt (#4635)
Browse files### What problem does this PR solve?
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
- agent/templates/research_report.json +0 -0
- conf/infinity_mapping.json +1 -1
- graphrag/utils.py +1 -1
- rag/utils/infinity_conn.py +11 -4
agent/templates/research_report.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
conf/infinity_mapping.json
CHANGED
|
@@ -30,7 +30,7 @@
|
|
| 30 |
"knowledge_graph_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace"},
|
| 31 |
"entities_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace"},
|
| 32 |
"pagerank_fea": {"type": "integer", "default": 0},
|
| 33 |
-
"tag_feas": {"type": "
|
| 34 |
|
| 35 |
"important_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace"},
|
| 36 |
"from_entity_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace"},
|
|
|
|
| 30 |
"knowledge_graph_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace"},
|
| 31 |
"entities_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace"},
|
| 32 |
"pagerank_fea": {"type": "integer", "default": 0},
|
| 33 |
+
"tag_feas": {"type": "varchar", "default": ""},
|
| 34 |
|
| 35 |
"important_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace"},
|
| 36 |
"from_entity_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace"},
|
graphrag/utils.py
CHANGED
|
@@ -484,7 +484,7 @@ def update_nodes_pagerank_nhop_neighbour(tenant_id, kb_id, graph, n_hop):
|
|
| 484 |
chunk,
|
| 485 |
search.index_name(tenant_id), kb_id)
|
| 486 |
else:
|
| 487 |
-
settings.docStoreConn.insert([{"id": chunk_id(chunk), **chunk}], search.index_name(tenant_id))
|
| 488 |
|
| 489 |
|
| 490 |
def get_entity_type2sampels(idxnms, kb_ids: list):
|
|
|
|
| 484 |
chunk,
|
| 485 |
search.index_name(tenant_id), kb_id)
|
| 486 |
else:
|
| 487 |
+
settings.docStoreConn.insert([{"id": chunk_id(chunk), **chunk}], search.index_name(tenant_id), kb_id)
|
| 488 |
|
| 489 |
|
| 490 |
def get_entity_type2sampels(idxnms, kb_ids: list):
|
rag/utils/infinity_conn.py
CHANGED
|
@@ -299,7 +299,7 @@ class InfinityConnection(DocStoreConnection):
|
|
| 299 |
matchExpr.extra_options[k] = str(v)
|
| 300 |
logger.debug(f"INFINITY search MatchTextExpr: {json.dumps(matchExpr.__dict__)}")
|
| 301 |
elif isinstance(matchExpr, MatchDenseExpr):
|
| 302 |
-
if filter_cond and "filter" not in matchExpr.extra_options:
|
| 303 |
matchExpr.extra_options.update({"filter": filter_fulltext})
|
| 304 |
for k, v in matchExpr.extra_options.items():
|
| 305 |
if not isinstance(v, str):
|
|
@@ -424,9 +424,11 @@ class InfinityConnection(DocStoreConnection):
|
|
| 424 |
assert "_id" not in d
|
| 425 |
assert "id" in d
|
| 426 |
for k, v in d.items():
|
| 427 |
-
if k in ["important_kwd", "question_kwd", "entities_kwd"]:
|
| 428 |
assert isinstance(v, list)
|
| 429 |
d[k] = "###".join(v)
|
|
|
|
|
|
|
| 430 |
elif k == 'kb_id':
|
| 431 |
if isinstance(d[k], list):
|
| 432 |
d[k] = d[k][0] # since d[k] is a list, but we need a str
|
|
@@ -462,7 +464,12 @@ class InfinityConnection(DocStoreConnection):
|
|
| 462 |
del condition["exist"]
|
| 463 |
filter = equivalent_condition_to_str(condition)
|
| 464 |
for k, v in list(newValue.items()):
|
| 465 |
-
if k
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 466 |
newValue[k] = " ".join(v)
|
| 467 |
elif k == 'kb_id':
|
| 468 |
if isinstance(newValue[k], list):
|
|
@@ -531,7 +538,7 @@ class InfinityConnection(DocStoreConnection):
|
|
| 531 |
v = res[fieldnm][i]
|
| 532 |
if isinstance(v, Series):
|
| 533 |
v = list(v)
|
| 534 |
-
elif fieldnm in ["important_kwd", "question_kwd", "entities_kwd"]:
|
| 535 |
assert isinstance(v, str)
|
| 536 |
v = [kwd for kwd in v.split("###") if kwd]
|
| 537 |
elif fieldnm == "position_int":
|
|
|
|
| 299 |
matchExpr.extra_options[k] = str(v)
|
| 300 |
logger.debug(f"INFINITY search MatchTextExpr: {json.dumps(matchExpr.__dict__)}")
|
| 301 |
elif isinstance(matchExpr, MatchDenseExpr):
|
| 302 |
+
if filter_fulltext and filter_cond and "filter" not in matchExpr.extra_options:
|
| 303 |
matchExpr.extra_options.update({"filter": filter_fulltext})
|
| 304 |
for k, v in matchExpr.extra_options.items():
|
| 305 |
if not isinstance(v, str):
|
|
|
|
| 424 |
assert "_id" not in d
|
| 425 |
assert "id" in d
|
| 426 |
for k, v in d.items():
|
| 427 |
+
if k in ["important_kwd", "question_kwd", "entities_kwd", "tag_kwd"]:
|
| 428 |
assert isinstance(v, list)
|
| 429 |
d[k] = "###".join(v)
|
| 430 |
+
elif re.search(r"_feas$", k):
|
| 431 |
+
d[k] = json.dumps(v)
|
| 432 |
elif k == 'kb_id':
|
| 433 |
if isinstance(d[k], list):
|
| 434 |
d[k] = d[k][0] # since d[k] is a list, but we need a str
|
|
|
|
| 464 |
del condition["exist"]
|
| 465 |
filter = equivalent_condition_to_str(condition)
|
| 466 |
for k, v in list(newValue.items()):
|
| 467 |
+
if k in ["important_kwd", "question_kwd", "entities_kwd", "tag_kwd"]:
|
| 468 |
+
assert isinstance(v, list)
|
| 469 |
+
newValue[k] = "###".join(v)
|
| 470 |
+
elif re.search(r"_feas$", k):
|
| 471 |
+
newValue[k] = json.dumps(v)
|
| 472 |
+
elif k.endswith("_kwd") and isinstance(v, list):
|
| 473 |
newValue[k] = " ".join(v)
|
| 474 |
elif k == 'kb_id':
|
| 475 |
if isinstance(newValue[k], list):
|
|
|
|
| 538 |
v = res[fieldnm][i]
|
| 539 |
if isinstance(v, Series):
|
| 540 |
v = list(v)
|
| 541 |
+
elif fieldnm in ["important_kwd", "question_kwd", "entities_kwd", "tag_kwd"]:
|
| 542 |
assert isinstance(v, str)
|
| 543 |
v = [kwd for kwd in v.split("###") if kwd]
|
| 544 |
elif fieldnm == "position_int":
|