KevinHuSh
commited on
Commit
·
3a31a8a
1
Parent(s):
60101af
refine citation (#161)
Browse files- api/apps/conversation_app.py +4 -3
- rag/app/paper.py +1 -1
- rag/nlp/search.py +16 -13
api/apps/conversation_app.py
CHANGED
|
@@ -194,7 +194,8 @@ def chat(dialog, messages, **kwargs):
|
|
| 194 |
# try to use sql if field mapping is good to go
|
| 195 |
if field_map:
|
| 196 |
chat_logger.info("Use SQL to retrieval:{}".format(questions[-1]))
|
| 197 |
-
|
|
|
|
| 198 |
|
| 199 |
prompt_config = dialog.prompt_config
|
| 200 |
for p in prompt_config["parameters"]:
|
|
@@ -305,7 +306,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl):
|
|
| 305 |
|
| 306 |
tbl, sql = get_table()
|
| 307 |
if tbl is None:
|
| 308 |
-
return None
|
| 309 |
if tbl.get("error") and tried_times <= 2:
|
| 310 |
user_promt = """
|
| 311 |
表名:{};
|
|
@@ -333,7 +334,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl):
|
|
| 333 |
chat_logger.info("GET table: {}".format(tbl))
|
| 334 |
print(tbl)
|
| 335 |
if tbl.get("error") or len(tbl["rows"]) == 0:
|
| 336 |
-
return None
|
| 337 |
|
| 338 |
docid_idx = set([ii for ii, c in enumerate(
|
| 339 |
tbl["columns"]) if c["name"] == "doc_id"])
|
|
|
|
| 194 |
# try to use sql if field mapping is good to go
|
| 195 |
if field_map:
|
| 196 |
chat_logger.info("Use SQL to retrieval:{}".format(questions[-1]))
|
| 197 |
+
ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl)
|
| 198 |
+
if ans: return ans
|
| 199 |
|
| 200 |
prompt_config = dialog.prompt_config
|
| 201 |
for p in prompt_config["parameters"]:
|
|
|
|
| 306 |
|
| 307 |
tbl, sql = get_table()
|
| 308 |
if tbl is None:
|
| 309 |
+
return None
|
| 310 |
if tbl.get("error") and tried_times <= 2:
|
| 311 |
user_promt = """
|
| 312 |
表名:{};
|
|
|
|
| 334 |
chat_logger.info("GET table: {}".format(tbl))
|
| 335 |
print(tbl)
|
| 336 |
if tbl.get("error") or len(tbl["rows"]) == 0:
|
| 337 |
+
return None
|
| 338 |
|
| 339 |
docid_idx = set([ii for ii, c in enumerate(
|
| 340 |
tbl["columns"]) if c["name"] == "doc_id"])
|
rag/app/paper.py
CHANGED
|
@@ -120,7 +120,7 @@ class Pdf(PdfParser):
|
|
| 120 |
print(tbls)
|
| 121 |
|
| 122 |
return {
|
| 123 |
-
"title": title
|
| 124 |
"authors": " ".join(authors),
|
| 125 |
"abstract": abstr,
|
| 126 |
"sections": [(b["text"] + self._line_tag(b, zoomin), b.get("layoutno", "")) for b in self.boxes[i:] if
|
|
|
|
| 120 |
print(tbls)
|
| 121 |
|
| 122 |
return {
|
| 123 |
+
"title": title,
|
| 124 |
"authors": " ".join(authors),
|
| 125 |
"abstract": abstr,
|
| 126 |
"sections": [(b["text"] + self._line_tag(b, zoomin), b.get("layoutno", "")) for b in self.boxes[i:] if
|
rag/nlp/search.py
CHANGED
|
@@ -246,19 +246,22 @@ class Dealer:
|
|
| 246 |
chunks_tks = [huqie.qie(self.qryr.rmWWW(ck)).split(" ")
|
| 247 |
for ck in chunks]
|
| 248 |
cites = {}
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
|
|
|
|
|
|
|
|
|
| 262 |
|
| 263 |
res = ""
|
| 264 |
seted = set([])
|
|
|
|
| 246 |
chunks_tks = [huqie.qie(self.qryr.rmWWW(ck)).split(" ")
|
| 247 |
for ck in chunks]
|
| 248 |
cites = {}
|
| 249 |
+
thr = 0.63
|
| 250 |
+
while len(cites.keys()) == 0 and pieces_ and chunks_tks:
|
| 251 |
+
for i, a in enumerate(pieces_):
|
| 252 |
+
sim, tksim, vtsim = self.qryr.hybrid_similarity(ans_v[i],
|
| 253 |
+
chunk_v,
|
| 254 |
+
huqie.qie(
|
| 255 |
+
self.qryr.rmWWW(pieces_[i])).split(" "),
|
| 256 |
+
chunks_tks,
|
| 257 |
+
tkweight, vtweight)
|
| 258 |
+
mx = np.max(sim) * 0.99
|
| 259 |
+
es_logger.info("{} SIM: {}".format(pieces_[i], mx))
|
| 260 |
+
if mx < thr:
|
| 261 |
+
continue
|
| 262 |
+
cites[idx[i]] = list(
|
| 263 |
+
set([str(ii) for ii in range(len(chunk_v)) if sim[ii] > mx]))[:4]
|
| 264 |
+
thr *= 0.8
|
| 265 |
|
| 266 |
res = ""
|
| 267 |
seted = set([])
|