MahatirTusher commited on
Commit
4edf23f
·
verified ·
1 Parent(s): 044f9a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -38
app.py CHANGED
@@ -9,6 +9,7 @@ import base64
9
  import xml.etree.ElementTree as ET
10
  import json
11
  import time
 
12
 
13
  # Set page configuration and styling
14
  st.set_page_config(
@@ -552,55 +553,146 @@ Format your response with markdown headings for better readability.
552
  return f"Failed to generate literature survey due to an error: {str(e)}"
553
 
554
  # Add the add_literature_survey_button function
555
- def add_literature_survey_button(search_results_df):
 
 
556
  """
557
- Add a button to generate a literature survey based on search results
558
 
559
  Parameters:
560
- search_results_df (pandas.DataFrame): DataFrame containing search results
 
 
 
 
561
  """
562
- if search_results_df is not None and not search_results_df.empty:
563
- # Check if arXiv results are included
564
- has_arxiv = "Source" in search_results_df.columns and "arXiv" in search_results_df["Source"].values
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
565
 
566
- if not has_arxiv:
567
- st.warning("For best literature survey results, include arXiv in your search sources. arXiv papers typically have more comprehensive abstracts.")
 
 
 
 
 
 
 
568
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
569
  if st.button("Generate Literature Survey"):
570
- with st.spinner("Generating literature survey using AI... This may take a minute."):
571
- # Convert DataFrame to list of dictionaries
572
- papers = search_results_df.to_dict('records')
573
-
574
- # Generate the survey
575
  survey = generate_literature_survey(papers)
576
-
577
- # Display the survey with proper markdown rendering
578
- st.markdown("## Literature Survey")
579
  st.markdown(survey)
580
-
581
- # Add a download button for the survey
582
- st.download_button(
583
- label="Download Survey as Text",
584
- data=survey,
585
- file_name="literature_survey.md",
586
- mime="text/markdown"
587
- )
588
- else:
589
- st.info("Run a search first to generate a literature survey.")
590
 
591
- def literature_survey_page():
592
- st.markdown('<div class="card">', unsafe_allow_html=True)
593
- st.title("Literature Survey Generator")
594
- st.write("Generate comprehensive literature surveys from your search results.")
595
-
596
- if st.session_state.search_results_df is not None and not st.session_state.search_results_df.empty:
597
- st.write(f"Using {len(st.session_state.search_results_df)} papers from your last search.")
598
- add_literature_survey_button(st.session_state.search_results_df)
599
  else:
600
- st.info("Please perform a search first to gather papers for your literature survey.")
601
- if st.button("Go to Search Page"):
602
- st.session_state.page = "search"
603
- st.markdown('</div>', unsafe_allow_html=True)
 
604
 
605
  # Function to summarize text using the specified model
606
  def summarize_text(text):
 
9
  import xml.etree.ElementTree as ET
10
  import json
11
  import time
12
+ from transformers import pipeline
13
 
14
  # Set page configuration and styling
15
  st.set_page_config(
 
553
  return f"Failed to generate literature survey due to an error: {str(e)}"
554
 
555
  # Add the add_literature_survey_button function
556
+
557
+ # Function to generate a literature survey using Hugging Face API
558
+ def generate_literature_survey(papers, api_key=None):
559
  """
560
+ Generate a literature survey based on paper abstracts using HuggingFace API
561
 
562
  Parameters:
563
+ papers (list): List of papers with abstracts
564
+ api_key (str): Optional HuggingFace API key
565
+
566
+ Returns:
567
+ str: Generated literature survey
568
  """
569
+ # Retrieve the API key from Hugging Face Spaces Secrets or local secrets
570
+ api_key = api_key or st.secrets.get("hf_api_key")
571
+
572
+ if not api_key:
573
+ st.error("No Hugging Face API key found. Please add it to your Space Secrets or .streamlit/secrets.toml.")
574
+ return "Error: Missing API key."
575
+
576
+ # Check if we have papers with abstracts
577
+ if not papers or len(papers) == 0:
578
+ return "No papers found to generate a literature survey."
579
+
580
+ # Filter papers that have abstracts
581
+ papers_with_abstracts = [p for p in papers if p.get("Abstract") and len(p.get("Abstract")) > 50]
582
+
583
+ if len(papers_with_abstracts) == 0:
584
+ return "Cannot generate a literature survey because none of the papers have substantial abstracts."
585
+
586
+ # Construct the prompt for the LLM
587
+ paper_info = []
588
+ for i, paper in enumerate(papers_with_abstracts[:10]): # Limit to 10 papers to avoid token limits
589
+ paper_info.append(f"Paper {i+1}:\nTitle: {paper.get('Title', 'Unknown')}\nAuthors: {paper.get('Author(s)', 'Unknown')}\nYear: {paper.get('Published', 'Unknown')}\nAbstract: {paper.get('Abstract', 'No abstract available')}\n")
590
+
591
+ papers_text = "\n".join(paper_info)
592
+
593
+ prompt = f"""You are an expert academic researcher. Based on the following papers and their abstracts,
594
+ write a concise literature survey that:
595
+ 1. Identifies the main themes and research directions
596
+ 2. Highlights methodological approaches
597
+ 3. Summarizes key findings
598
+ 4. Points out research gaps if evident
599
+ 5. Suggests potential future research directions
600
+
601
+ Here are the papers:
602
+
603
+ {papers_text}
604
+
605
+ Please organize the survey by themes rather than by individual papers, creating connections between studies.
606
+ Format your response with markdown headings for better readability.
607
+ """
608
+
609
+ headers = {"Authorization": f"Bearer {api_key}"}
610
+
611
+ # Use HuggingFace's Inference API
612
+ try:
613
+ # Try using Mistral or another available LLM on HuggingFace
614
+ API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
615
+ response = requests.post(
616
+ API_URL,
617
+ headers=headers,
618
+ json={
619
+ "inputs": prompt,
620
+ "parameters": {"max_new_tokens": 2000, "temperature": 0.3}
621
+ }
622
+ )
623
+ response.raise_for_status()
624
 
625
+ # Parse the response
626
+ result = response.json()
627
+ if isinstance(result, list) and len(result) > 0:
628
+ if isinstance(result[0], dict) and "generated_text" in result[0]:
629
+ survey_text = result[0]["generated_text"]
630
+ else:
631
+ survey_text = str(result[0])
632
+ else:
633
+ survey_text = str(result)
634
 
635
+ return survey_text
636
+ except Exception as e:
637
+ st.error(f"Error generating literature survey: {e}")
638
+ # Fallback to local summarization as a last resort
639
+ try:
640
+ summarizer = pipeline("summarization")
641
+ chunks = [p.get("Abstract", "") for p in papers_with_abstracts]
642
+ summary = "# Literature Survey\n\n"
643
+ for i, chunk in enumerate(chunks):
644
+ if chunk and len(chunk) > 50:
645
+ chunk_summary = summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]['summary_text']
646
+ summary += f"## Paper {i+1}: {papers_with_abstracts[i].get('Title', 'Unknown')}\n\n{chunk_summary}\n\n"
647
+ return summary
648
+ except Exception as fallback_error:
649
+ return f"Failed to generate literature survey due to an error: {str(fallback_error)}"
650
+
651
+ # Generate Literature Survey Page
652
+ def generate_literature_survey_page():
653
+ st.title("📚 Generate Literature Survey")
654
+ st.markdown(
655
+ """
656
+ Upload a list of research papers (with abstracts) and generate a comprehensive literature survey.
657
+ The survey will identify themes, methodologies, key findings, and future research directions.
658
+ """
659
+ )
660
+
661
+ # Input: Papers Data
662
+ st.subheader("Upload Papers Data")
663
+ uploaded_file = st.file_uploader("Upload a CSV file containing papers (with columns: Title, Author(s), Abstract, Published)", type=["csv"])
664
+
665
+ if uploaded_file is not None:
666
+ try:
667
+ # Load the uploaded CSV file
668
+ papers_df = pd.read_csv(uploaded_file)
669
+
670
+ # Validate required columns
671
+ required_columns = {"Title", "Author(s)", "Abstract", "Published"}
672
+ if not required_columns.issubset(papers_df.columns):
673
+ st.error(f"CSV file must contain the following columns: {', '.join(required_columns)}")
674
+ return
675
+
676
+ # Convert DataFrame to list of dictionaries
677
+ papers = papers_df.to_dict(orient="records")
678
+ st.success(f"Successfully loaded {len(papers)} papers.")
679
+ except Exception as e:
680
+ st.error(f"Error loading CSV file: {e}")
681
+ return
682
+
683
+ # Button to Generate Literature Survey
684
  if st.button("Generate Literature Survey"):
685
+ with st.spinner("Generating literature survey..."):
 
 
 
 
686
  survey = generate_literature_survey(papers)
687
+ st.success("Literature Survey Generated!")
 
 
688
  st.markdown(survey)
 
 
 
 
 
 
 
 
 
 
689
 
 
 
 
 
 
 
 
 
690
  else:
691
+ st.info("Please upload a CSV file to proceed.")
692
+
693
+ # Add this page to the multi-page navigation
694
+ if __name__ == "__main__":
695
+ generate_literature_survey_page()
696
 
697
  # Function to summarize text using the specified model
698
  def summarize_text(text):