Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import os | |
| import pickle | |
| import torch | |
| from grobidmonkey import reader | |
| from transformers import pipeline | |
| from transformers import BartTokenizer, BartModel, BartForConditionalGeneration | |
| from transformers import T5Tokenizer, T5ForConditionalGeneration | |
| from document import Document | |
| from BartSE import BARTAutoEncoder | |
| def save_uploaded_file(uploaded_file): | |
| file_path = os.path.join("./uploads", uploaded_file.name) | |
| os.makedirs("./uploads", exist_ok=True) # Create 'uploads' directory if it doesn't exist | |
| with open(file_path, "wb") as f: | |
| f.write(uploaded_file.getbuffer()) | |
| return file_path # Return the file path as a string | |
| st.title('Paper2Slides') | |
| st.subheader('Upload paper in pdf format') | |
| col1, col2 = st.columns([3, 1]) | |
| with col1: | |
| uploaded_file = st.file_uploader("Choose a file") | |
| with col2: | |
| option = st.selectbox( | |
| 'Select parsing method.', | |
| ('monkey', 'x2d', 'lxml')) | |
| if uploaded_file is not None: | |
| st.write(uploaded_file.name) | |
| bytes_data = uploaded_file.getvalue() | |
| st.write(len(bytes_data), "bytes") | |
| saved_file_path = save_uploaded_file(uploaded_file) | |
| monkeyReader = reader.MonkeyReader(option) | |
| outline = monkeyReader.readOutline(saved_file_path) | |
| for pre, fill, node in outline: | |
| st.write("%s%s" % (pre, node.name)) | |
| # read paper content | |
| essay = monkeyReader.readEssay(saved_file_path) | |
| # with st.status("Understanding paper..."): | |
| # Barttokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn') | |
| # summ_model_path = 'com3dian/Bart-large-paper2slides-summarizer' | |
| # summarizor = BartForConditionalGeneration.from_pretrained(summ_model_path) | |
| # exp_model_path = 'com3dian/Bart-large-paper2slides-expander' | |
| # expandor = BartForConditionalGeneration.from_pretrained(exp_model_path) | |
| # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| # BartSE = BARTAutoEncoder(summarizor, summarizor, device) | |
| # del summarizor, expandor | |
| # document = Document(essay, Barttokenizer) | |
| # del Barttokenizer | |
| # length = document.merge(25, 30, BartSE, device) | |
| # with st.status("Generating slides..."): | |
| # summarizor = pipeline("summarization", model=summ_model_path, device = device) | |
| # summ_text = summarizor(document.segmentation['text'], max_length=100, min_length=10, do_sample=False) | |
| # summ_text = [text['summary_text'] for text in summ_text] | |
| # for summ in summ_text: | |
| # st.write(summ) | |
| with open('slides_text.pkl', 'rb') as file: | |
| summ_text = pickle.load(file) | |
| # Function to render HTML content | |
| def render_html(text): | |
| return f"<div>{text}</div>" | |
| # Initialize session state for page index | |
| if 'page_index' not in st.session_state: | |
| st.session_state.page_index = 0 | |
| # Function to handle page turn | |
| def turn_page(direction): | |
| if direction == "next" and st.session_state.page_index < len(text_list) - 1: | |
| st.session_state.page_index += 1 | |
| elif direction == "prev" and st.session_state.page_index > 0: | |
| st.session_state.page_index -= 1 | |
| # Display page turner controls | |
| col1, col2, col3 = st.columns([1, 2, 1]) | |
| with col1: | |
| st.button("Previous", on_click=turn_page, args=("prev",)) | |
| with col3: | |
| st.button("Next", on_click=turn_page, args=("next",)) | |
| with col2: | |
| st.write(f"Page {st.session_state.page_index + 1} of {len(text_list)}") | |
| # Display editable text box | |
| text = st.text_area("Edit Text", summ_text[st.session_state.page_index], height=200) | |
| # Display HTML box | |
| st.markdown(render_html(text), unsafe_allow_html=True) | |
| # Update list with edited text | |
| text_list[st.session_state.page_index] = text | |