File size: 2,514 Bytes
4fe6e77 51f1445 4fe6e77 8722edc 4fe6e77 8722edc 4fe6e77 8722edc 4fe6e77 2275cf7 4fe6e77 2275cf7 4fe6e77 2275cf7 4fe6e77 2275cf7 4fe6e77 2275cf7 4fe6e77 2275cf7 4fe6e77 2275cf7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import streamlit as st
from langchain import OpenAI, PromptTemplate, LLMChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate
from langchain.chat_models import AzureChatOpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.chains import AnalyzeDocumentChain
from PyPDF2 import PdfReader
from langchain.document_loaders import TextLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.document_loaders import PyPDFLoader
import os
os.environ["OPENAI_API_TYPE"] = "azure"
os.environ["OPENAI_API_VERSION"] = "2023-03-15-preview"
llm = AzureChatOpenAI(
deployment_name="esujnand", model_name="gpt-35-turbo"
)
st.title("Wipro CSRD AI 1")
# description text
st.write("Step 1: Summary of your selected section of CSRD... Sections in this are enviormental topic1, enviornamtal topic2 ")
st.write("Step 2: Ask your specfici questions regarding a CSRD disclosure requirments")
# pdf file upload
pdf_file = st.file_uploader("Upload file", type=["pdf"])
numberofpages = 100
if st.button("How many pages? "):
reader = PdfReader(pdf_file)
numberofpages = len(reader.pages)
st.write("length is ", numberofpages)
if st.button("table of contents? "):
reader = PdfReader(pdf_file)
page = reader.pages[2].extract_text()
st.write(page)
startpage = st.slider('Which section to look at', 0, numberofpages, 1)
st.write("starting section page", startpage)
pagecount = st.slider('How many pages', 1, 5, 1)
st.write("pages to read", pagecount)
def extract_text_from_pdf():
reader = PdfReader(pdf_file)
# get all pages text
text = [reader.pages[i].extract_text() for i in range(startpage, startpage + pagecount )]
# join all pages text
text = " ".join(text)
return text
def extract_text_from_pdf2():
reader = PdfReader(pdf_file)
# get all pages text
text = [reader.pages[i].extract_text() for i in range(len(reader.pages))]
# join all pages text
text = " ".join(text)
return text
if st.button("Summerize "):
with st.spinner("Extracting Text..."):
summary_chain = load_summarize_chain(llm, chain_type="map_reduce")
summarize_document_chain = AnalyzeDocumentChain(combine_docs_chain=summary_chain, verbose=True)
text = extract_text_from_pdf()
with st.spinner("Summarizing..."):
result = summarize_document_chain.run(text)
st.write(result)
|