Spaces:

kiyer
/

pathfinder

Running on CPU Upgrade

App Files Files Community

kiyer commited on Aug 1, 2024

Commit

01799cd

verified ·

1 Parent(s): f08a02e

add feedback form link

Browse files

Files changed (1) hide show

app.py +55 -23

app.py CHANGED Viewed

@@ -15,6 +15,7 @@ from datasets import load_dataset, load_from_disk
 from collections import Counter
 import yaml, json, requests, sys, os, time
 import concurrent.futures
 from langchain import hub
@@ -498,13 +499,25 @@ def guess_question_type(query: str):
     return gen_client.invoke(messages).content
 class OverallConsensusEvaluation(BaseModel):
-    consensus: Literal["Strong Agreement", "Moderate Agreement", "Weak Agreement", "No Clear Consensus", "Weak Disagreement", "Moderate Disagreement", "Strong Disagreement"] = Field(
         ...,
-        description="The overall level of consensus between the query and the abstracts"
     )
     explanation: str = Field(
         ...,
-        description="A detailed explanation of the consensus evaluation"
     )
     relevance_score: float = Field(
         ...,
@@ -514,25 +527,31 @@ class OverallConsensusEvaluation(BaseModel):
     )
 def evaluate_overall_consensus(query: str, abstracts: List[str]) -> OverallConsensusEvaluation:
-    """
-    Evaluates the overall consensus of the abstracts in relation to the query in a single LLM call.
-    """
     prompt = f"""
     Query: {query}
-    You will be provided with {len(abstracts)} scientific abstracts. Your task is to:
-    1. Evaluate the overall consensus between the query and the abstracts.
-    2. Provide a detailed explanation of your consensus evaluation.
-    3. Assign an overall relevance score from 0 to 1, where 0 means completely irrelevant and 1 means highly relevant.
-    For the consensus evaluation, use one of the following levels:
-    Strong Agreement, Moderate Agreement, Weak Agreement, No Clear Consensus, Weak Disagreement, Moderate Disagreement, Strong Disagreement
     Here are the abstracts:
     {' '.join([f"Abstract {i+1}: {abstract}" for i, abstract in enumerate(abstracts)])}
-    Provide your evaluation in a structured format.
     """
     response = st.session_state.consensus_client.chat.completions.create(
@@ -574,7 +593,7 @@ def make_embedding_plot(papers_df, consensus_answer):
     alphas = np.ones((len(plt_indices),)) * 0.9
     alphas[outlier_flag] = 0.5
-    fig = plt.figure(figsize=(9*2.,12*2.))
     plt.scatter(xax,yax, s=1, alpha=0.01, c='k')
     clkws = np.load('kw_tags.npz')
@@ -669,16 +688,29 @@ if st.session_state.get('runpfdr'):
     }
     @st.fragment()
-    def download_op(data):
         json_string = json.dumps(data)
         st.download_button(
             label='Download output',
             file_name="pathfinder_data.json",
             mime="application/json",
-            data=json_string,)
-    # with st.sidebar:
-    download_op(session_vars)
 else:
     st.info("Use the sidebar to tweak the search parameters to get better results.")

 from collections import Counter
 import yaml, json, requests, sys, os, time
+import urllib.parse
 import concurrent.futures
 from langchain import hub
     return gen_client.invoke(messages).content
 class OverallConsensusEvaluation(BaseModel):
+    rewritten_statement: str = Field(
         ...,
+        description="The query rewritten as a statement if it was initially a question"
+    )
+    consensus: Literal[
+        "Strong Agreement Between Abstracts and Query",
+        "Moderate Agreement Between Abstracts and Query",
+        "Weak Agreement Between Abstracts and Query",
+        "No Clear Agreement/Disagreement Between Abstracts and Query",
+        "Weak Disagreement Between Abstracts and Query",
+        "Moderate Disagreement Between Abstracts and Query",
+        "Strong Disagreement Between Abstracts and Query"
+    ] = Field(
+        ...,
+        description="The overall level of consensus between the rewritten statement and the abstracts"
     )
     explanation: str = Field(
         ...,
+        description="A detailed explanation of the consensus evaluation (maximum six sentences)"
     )
     relevance_score: float = Field(
         ...,
     )
 def evaluate_overall_consensus(query: str, abstracts: List[str]) -> OverallConsensusEvaluation:
     prompt = f"""
     Query: {query}
+    You will be provided with {len(abstracts)} scientific abstracts. Your task is to do the following:
+    1. If the provided query is a question, rewrite it as a statement. This statement does not have to be true. Output this as 'Rewritten Statement:'.
+    2. Evaluate the overall consensus between the rewritten statement and the abstracts using one of the following levels:
+        - Strong Agreement Between Abstracts and Query
+        - Moderate Agreement Between Abstracts and Query
+        - Weak Agreement Between Abstracts and Query
+        - No Clear Agreement/Disagreement Between Abstracts and Query
+        - Weak Disagreement Between Abstracts and Query
+        - Moderate Disagreement Between Abstracts and Query
+        - Strong Disagreement Between Abstracts and Query
+    Output this as 'Consensus:'
+    3. Provide a detailed explanation of your consensus evaluation in maximum six sentences. Output this as 'Explanation:'
+    4. Assign a relevance score as a float between 0 to 1, where:
+        - 1.0: Perfect match in content and quality
+        - 0.8-0.9: Excellent, with minor differences
+        - 0.6-0.7: Good, captures main points but misses some details
+        - 0.4-0.5: Fair, partially relevant but significant gaps
+        - 0.2-0.3: Poor, major inaccuracies or omissions
+        - 0.0-0.1: Completely irrelevant or incorrect
+    Output this as 'Relevance Score:'
     Here are the abstracts:
     {' '.join([f"Abstract {i+1}: {abstract}" for i, abstract in enumerate(abstracts)])}
+    Provide your evaluation in the structured format described above.
     """
     response = st.session_state.consensus_client.chat.completions.create(
     alphas = np.ones((len(plt_indices),)) * 0.9
     alphas[outlier_flag] = 0.5
+    fig = plt.figure(figsize=(9*1.8,12*1.8))
     plt.scatter(xax,yax, s=1, alpha=0.01, c='k')
     clkws = np.load('kw_tags.npz')
     }
     @st.fragment()
+    def download_op(data, prefill_data):
         json_string = json.dumps(data)
         st.download_button(
             label='Download output',
             file_name="pathfinder_data.json",
             mime="application/json",
+            data=json_string,
+            use_container_width=True)
+        encoded_data = urllib.parse.urlencode(prefill_data)
+        prefilled_url = f"{form_url}?{encoded_data}"
+        st.link_button('Feedback: Help make pathfinder better!',
+            prefilled_url,
+            use_container_width=True)
+    form_url = "https://docs.google.com/forms/d/e/1FAIpQLScaPKbW1fiwksX-UewovCLwx6EArl7bxbVmdWMDBs_0Ct3i6g/viewform"
+    prefill_data = {
+        "entry.1224637570": query,  # Replace with your actual field ID
+        "entry.872565685": answer_text,   # Replace with your actual field ID
+    }
+    download_op(session_vars, prefill_data)
 else:
     st.info("Use the sidebar to tweak the search parameters to get better results.")