Spaces:

MahatirTusher
/

PaperQuest-Research_Finder_and_Text_Companion

Sleeping

App Files Files Community

MahatirTusher commited on Apr 2

Commit

4edf23f

verified ·

1 Parent(s): 044f9a8

Update app.py

Browse files

Files changed (1) hide show

app.py +130 -38

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ import base64
 import xml.etree.ElementTree as ET
 import json
 import time
 # Set page configuration and styling
 st.set_page_config(
@@ -552,55 +553,146 @@ Format your response with markdown headings for better readability.
         return f"Failed to generate literature survey due to an error: {str(e)}"
 # Add the add_literature_survey_button function
-def add_literature_survey_button(search_results_df):
     """
-    Add a button to generate a literature survey based on search results
     Parameters:
-    search_results_df (pandas.DataFrame): DataFrame containing search results
     """
-    if search_results_df is not None and not search_results_df.empty:
-        # Check if arXiv results are included
-        has_arxiv = "Source" in search_results_df.columns and "arXiv" in search_results_df["Source"].values
-        if not has_arxiv:
-            st.warning("For best literature survey results, include arXiv in your search sources. arXiv papers typically have more comprehensive abstracts.")
         if st.button("Generate Literature Survey"):
-            with st.spinner("Generating literature survey using AI... This may take a minute."):
-                # Convert DataFrame to list of dictionaries
-                papers = search_results_df.to_dict('records')
-                # Generate the survey
                 survey = generate_literature_survey(papers)
-                # Display the survey with proper markdown rendering
-                st.markdown("## Literature Survey")
                 st.markdown(survey)
-                # Add a download button for the survey
-                st.download_button(
-                    label="Download Survey as Text",
-                    data=survey,
-                    file_name="literature_survey.md",
-                    mime="text/markdown"
-                )
-    else:
-        st.info("Run a search first to generate a literature survey.")
-def literature_survey_page():
-    st.markdown('<div class="card">', unsafe_allow_html=True)
-    st.title("Literature Survey Generator")
-    st.write("Generate comprehensive literature surveys from your search results.")
-    if st.session_state.search_results_df is not None and not st.session_state.search_results_df.empty:
-        st.write(f"Using {len(st.session_state.search_results_df)} papers from your last search.")
-        add_literature_survey_button(st.session_state.search_results_df)
     else:
-        st.info("Please perform a search first to gather papers for your literature survey.")
-        if st.button("Go to Search Page"):
-            st.session_state.page = "search"
-    st.markdown('</div>', unsafe_allow_html=True)
 # Function to summarize text using the specified model
 def summarize_text(text):

 import xml.etree.ElementTree as ET
 import json
 import time
+from transformers import pipeline
 # Set page configuration and styling
 st.set_page_config(
         return f"Failed to generate literature survey due to an error: {str(e)}"
 # Add the add_literature_survey_button function
+# Function to generate a literature survey using Hugging Face API
+def generate_literature_survey(papers, api_key=None):
     """
+    Generate a literature survey based on paper abstracts using HuggingFace API
     Parameters:
+    papers (list): List of papers with abstracts
+    api_key (str): Optional HuggingFace API key
+    Returns:
+    str: Generated literature survey
     """
+    # Retrieve the API key from Hugging Face Spaces Secrets or local secrets
+    api_key = api_key or st.secrets.get("hf_api_key")
+    if not api_key:
+        st.error("No Hugging Face API key found. Please add it to your Space Secrets or .streamlit/secrets.toml.")
+        return "Error: Missing API key."
+    # Check if we have papers with abstracts
+    if not papers or len(papers) == 0:
+        return "No papers found to generate a literature survey."
+    # Filter papers that have abstracts
+    papers_with_abstracts = [p for p in papers if p.get("Abstract") and len(p.get("Abstract")) > 50]
+    if len(papers_with_abstracts) == 0:
+        return "Cannot generate a literature survey because none of the papers have substantial abstracts."
+    # Construct the prompt for the LLM
+    paper_info = []
+    for i, paper in enumerate(papers_with_abstracts[:10]):  # Limit to 10 papers to avoid token limits
+        paper_info.append(f"Paper {i+1}:\nTitle: {paper.get('Title', 'Unknown')}\nAuthors: {paper.get('Author(s)', 'Unknown')}\nYear: {paper.get('Published', 'Unknown')}\nAbstract: {paper.get('Abstract', 'No abstract available')}\n")
+    papers_text = "\n".join(paper_info)
+    prompt = f"""You are an expert academic researcher. Based on the following papers and their abstracts,
+write a concise literature survey that:
+1. Identifies the main themes and research directions
+2. Highlights methodological approaches
+3. Summarizes key findings
+4. Points out research gaps if evident
+5. Suggests potential future research directions
+Here are the papers:
+{papers_text}
+Please organize the survey by themes rather than by individual papers, creating connections between studies.
+Format your response with markdown headings for better readability.
+"""
+    headers = {"Authorization": f"Bearer {api_key}"}
+    # Use HuggingFace's Inference API
+    try:
+        # Try using Mistral or another available LLM on HuggingFace
+        API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
+        response = requests.post(
+            API_URL,
+            headers=headers,
+            json={
+                "inputs": prompt,
+                "parameters": {"max_new_tokens": 2000, "temperature": 0.3}
+            }
+        )
+        response.raise_for_status()
+        # Parse the response
+        result = response.json()
+        if isinstance(result, list) and len(result) > 0:
+            if isinstance(result[0], dict) and "generated_text" in result[0]:
+                survey_text = result[0]["generated_text"]
+            else:
+                survey_text = str(result[0])
+        else:
+            survey_text = str(result)
+        return survey_text
+    except Exception as e:
+        st.error(f"Error generating literature survey: {e}")
+        # Fallback to local summarization as a last resort
+        try:
+            summarizer = pipeline("summarization")
+            chunks = [p.get("Abstract", "") for p in papers_with_abstracts]
+            summary = "# Literature Survey\n\n"
+            for i, chunk in enumerate(chunks):
+                if chunk and len(chunk) > 50:
+                    chunk_summary = summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]['summary_text']
+                    summary += f"## Paper {i+1}: {papers_with_abstracts[i].get('Title', 'Unknown')}\n\n{chunk_summary}\n\n"
+            return summary
+        except Exception as fallback_error:
+            return f"Failed to generate literature survey due to an error: {str(fallback_error)}"
+# Generate Literature Survey Page
+def generate_literature_survey_page():
+    st.title("📚 Generate Literature Survey")
+    st.markdown(
+        """
+        Upload a list of research papers (with abstracts) and generate a comprehensive literature survey.
+        The survey will identify themes, methodologies, key findings, and future research directions.
+        """
+    )
+    # Input: Papers Data
+    st.subheader("Upload Papers Data")
+    uploaded_file = st.file_uploader("Upload a CSV file containing papers (with columns: Title, Author(s), Abstract, Published)", type=["csv"])
+    if uploaded_file is not None:
+        try:
+            # Load the uploaded CSV file
+            papers_df = pd.read_csv(uploaded_file)
+            # Validate required columns
+            required_columns = {"Title", "Author(s)", "Abstract", "Published"}
+            if not required_columns.issubset(papers_df.columns):
+                st.error(f"CSV file must contain the following columns: {', '.join(required_columns)}")
+                return
+            # Convert DataFrame to list of dictionaries
+            papers = papers_df.to_dict(orient="records")
+            st.success(f"Successfully loaded {len(papers)} papers.")
+        except Exception as e:
+            st.error(f"Error loading CSV file: {e}")
+            return
+        # Button to Generate Literature Survey
         if st.button("Generate Literature Survey"):
+            with st.spinner("Generating literature survey..."):
                 survey = generate_literature_survey(papers)
+                st.success("Literature Survey Generated!")
                 st.markdown(survey)
     else:
+        st.info("Please upload a CSV file to proceed.")
+# Add this page to the multi-page navigation
+if __name__ == "__main__":
+    generate_literature_survey_page()
 # Function to summarize text using the specified model
 def summarize_text(text):