Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
add feedback form link
Browse files
app.py
CHANGED
|
@@ -15,6 +15,7 @@ from datasets import load_dataset, load_from_disk
|
|
| 15 |
from collections import Counter
|
| 16 |
|
| 17 |
import yaml, json, requests, sys, os, time
|
|
|
|
| 18 |
import concurrent.futures
|
| 19 |
|
| 20 |
from langchain import hub
|
|
@@ -498,13 +499,25 @@ def guess_question_type(query: str):
|
|
| 498 |
return gen_client.invoke(messages).content
|
| 499 |
|
| 500 |
class OverallConsensusEvaluation(BaseModel):
|
| 501 |
-
|
| 502 |
...,
|
| 503 |
-
description="The
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 504 |
)
|
| 505 |
explanation: str = Field(
|
| 506 |
...,
|
| 507 |
-
description="A detailed explanation of the consensus evaluation"
|
| 508 |
)
|
| 509 |
relevance_score: float = Field(
|
| 510 |
...,
|
|
@@ -514,25 +527,31 @@ class OverallConsensusEvaluation(BaseModel):
|
|
| 514 |
)
|
| 515 |
|
| 516 |
def evaluate_overall_consensus(query: str, abstracts: List[str]) -> OverallConsensusEvaluation:
|
| 517 |
-
"""
|
| 518 |
-
Evaluates the overall consensus of the abstracts in relation to the query in a single LLM call.
|
| 519 |
-
"""
|
| 520 |
prompt = f"""
|
| 521 |
Query: {query}
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 531 |
Here are the abstracts:
|
| 532 |
-
|
| 533 |
{' '.join([f"Abstract {i+1}: {abstract}" for i, abstract in enumerate(abstracts)])}
|
| 534 |
-
|
| 535 |
-
Provide your evaluation in a structured format.
|
| 536 |
"""
|
| 537 |
|
| 538 |
response = st.session_state.consensus_client.chat.completions.create(
|
|
@@ -574,7 +593,7 @@ def make_embedding_plot(papers_df, consensus_answer):
|
|
| 574 |
alphas = np.ones((len(plt_indices),)) * 0.9
|
| 575 |
alphas[outlier_flag] = 0.5
|
| 576 |
|
| 577 |
-
fig = plt.figure(figsize=(9*
|
| 578 |
plt.scatter(xax,yax, s=1, alpha=0.01, c='k')
|
| 579 |
|
| 580 |
clkws = np.load('kw_tags.npz')
|
|
@@ -669,16 +688,29 @@ if st.session_state.get('runpfdr'):
|
|
| 669 |
}
|
| 670 |
|
| 671 |
@st.fragment()
|
| 672 |
-
def download_op(data):
|
| 673 |
json_string = json.dumps(data)
|
| 674 |
st.download_button(
|
| 675 |
label='Download output',
|
| 676 |
file_name="pathfinder_data.json",
|
| 677 |
mime="application/json",
|
| 678 |
-
data=json_string,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 679 |
|
| 680 |
-
|
| 681 |
-
download_op(session_vars)
|
| 682 |
|
| 683 |
else:
|
| 684 |
st.info("Use the sidebar to tweak the search parameters to get better results.")
|
|
|
|
| 15 |
from collections import Counter
|
| 16 |
|
| 17 |
import yaml, json, requests, sys, os, time
|
| 18 |
+
import urllib.parse
|
| 19 |
import concurrent.futures
|
| 20 |
|
| 21 |
from langchain import hub
|
|
|
|
| 499 |
return gen_client.invoke(messages).content
|
| 500 |
|
| 501 |
class OverallConsensusEvaluation(BaseModel):
|
| 502 |
+
rewritten_statement: str = Field(
|
| 503 |
...,
|
| 504 |
+
description="The query rewritten as a statement if it was initially a question"
|
| 505 |
+
)
|
| 506 |
+
consensus: Literal[
|
| 507 |
+
"Strong Agreement Between Abstracts and Query",
|
| 508 |
+
"Moderate Agreement Between Abstracts and Query",
|
| 509 |
+
"Weak Agreement Between Abstracts and Query",
|
| 510 |
+
"No Clear Agreement/Disagreement Between Abstracts and Query",
|
| 511 |
+
"Weak Disagreement Between Abstracts and Query",
|
| 512 |
+
"Moderate Disagreement Between Abstracts and Query",
|
| 513 |
+
"Strong Disagreement Between Abstracts and Query"
|
| 514 |
+
] = Field(
|
| 515 |
+
...,
|
| 516 |
+
description="The overall level of consensus between the rewritten statement and the abstracts"
|
| 517 |
)
|
| 518 |
explanation: str = Field(
|
| 519 |
...,
|
| 520 |
+
description="A detailed explanation of the consensus evaluation (maximum six sentences)"
|
| 521 |
)
|
| 522 |
relevance_score: float = Field(
|
| 523 |
...,
|
|
|
|
| 527 |
)
|
| 528 |
|
| 529 |
def evaluate_overall_consensus(query: str, abstracts: List[str]) -> OverallConsensusEvaluation:
|
|
|
|
|
|
|
|
|
|
| 530 |
prompt = f"""
|
| 531 |
Query: {query}
|
| 532 |
+
You will be provided with {len(abstracts)} scientific abstracts. Your task is to do the following:
|
| 533 |
+
1. If the provided query is a question, rewrite it as a statement. This statement does not have to be true. Output this as 'Rewritten Statement:'.
|
| 534 |
+
2. Evaluate the overall consensus between the rewritten statement and the abstracts using one of the following levels:
|
| 535 |
+
- Strong Agreement Between Abstracts and Query
|
| 536 |
+
- Moderate Agreement Between Abstracts and Query
|
| 537 |
+
- Weak Agreement Between Abstracts and Query
|
| 538 |
+
- No Clear Agreement/Disagreement Between Abstracts and Query
|
| 539 |
+
- Weak Disagreement Between Abstracts and Query
|
| 540 |
+
- Moderate Disagreement Between Abstracts and Query
|
| 541 |
+
- Strong Disagreement Between Abstracts and Query
|
| 542 |
+
Output this as 'Consensus:'
|
| 543 |
+
3. Provide a detailed explanation of your consensus evaluation in maximum six sentences. Output this as 'Explanation:'
|
| 544 |
+
4. Assign a relevance score as a float between 0 to 1, where:
|
| 545 |
+
- 1.0: Perfect match in content and quality
|
| 546 |
+
- 0.8-0.9: Excellent, with minor differences
|
| 547 |
+
- 0.6-0.7: Good, captures main points but misses some details
|
| 548 |
+
- 0.4-0.5: Fair, partially relevant but significant gaps
|
| 549 |
+
- 0.2-0.3: Poor, major inaccuracies or omissions
|
| 550 |
+
- 0.0-0.1: Completely irrelevant or incorrect
|
| 551 |
+
Output this as 'Relevance Score:'
|
| 552 |
Here are the abstracts:
|
|
|
|
| 553 |
{' '.join([f"Abstract {i+1}: {abstract}" for i, abstract in enumerate(abstracts)])}
|
| 554 |
+
Provide your evaluation in the structured format described above.
|
|
|
|
| 555 |
"""
|
| 556 |
|
| 557 |
response = st.session_state.consensus_client.chat.completions.create(
|
|
|
|
| 593 |
alphas = np.ones((len(plt_indices),)) * 0.9
|
| 594 |
alphas[outlier_flag] = 0.5
|
| 595 |
|
| 596 |
+
fig = plt.figure(figsize=(9*1.8,12*1.8))
|
| 597 |
plt.scatter(xax,yax, s=1, alpha=0.01, c='k')
|
| 598 |
|
| 599 |
clkws = np.load('kw_tags.npz')
|
|
|
|
| 688 |
}
|
| 689 |
|
| 690 |
@st.fragment()
|
| 691 |
+
def download_op(data, prefill_data):
|
| 692 |
json_string = json.dumps(data)
|
| 693 |
st.download_button(
|
| 694 |
label='Download output',
|
| 695 |
file_name="pathfinder_data.json",
|
| 696 |
mime="application/json",
|
| 697 |
+
data=json_string,
|
| 698 |
+
use_container_width=True)
|
| 699 |
+
|
| 700 |
+
encoded_data = urllib.parse.urlencode(prefill_data)
|
| 701 |
+
prefilled_url = f"{form_url}?{encoded_data}"
|
| 702 |
+
st.link_button('Feedback: Help make pathfinder better!',
|
| 703 |
+
prefilled_url,
|
| 704 |
+
use_container_width=True)
|
| 705 |
+
|
| 706 |
+
|
| 707 |
+
form_url = "https://docs.google.com/forms/d/e/1FAIpQLScaPKbW1fiwksX-UewovCLwx6EArl7bxbVmdWMDBs_0Ct3i6g/viewform"
|
| 708 |
+
prefill_data = {
|
| 709 |
+
"entry.1224637570": query, # Replace with your actual field ID
|
| 710 |
+
"entry.872565685": answer_text, # Replace with your actual field ID
|
| 711 |
+
}
|
| 712 |
|
| 713 |
+
download_op(session_vars, prefill_data)
|
|
|
|
| 714 |
|
| 715 |
else:
|
| 716 |
st.info("Use the sidebar to tweak the search parameters to get better results.")
|